# Imports

In [175]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import kagglehub
from kagglehub import KaggleDatasetAdapter
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning) # TODO: Actually optimize the source of this warning

# Import Dataframe

In [176]:
# Set the path to the file you'd like to load
file_path = "aidedd_blocks2.csv"

# Load the latest version
df_original = kagglehub.dataset_load(
  KaggleDatasetAdapter.PANDAS,
  "travistyler/dnd-5e-monster-manual-stats",
  file_path,
  # Provide any additional arguments like
  # sql_query or pandas_kwargs. See the
  # documenation for more information:
  # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
)

df = df_original.copy()
# Create dataframe to keep track of the original values of each categorical data
original_categorical_vals = pd.DataFrame()

def update_ocv(col, unique):
  global original_categorical_vals
  original_categorical_vals = pd.concat([original_categorical_vals, pd.DataFrame({col:unique})], axis=1)

def __reclassify_categorical__(df, col):
  df_copy = df.copy()
  unique = df_copy[col].unique()
  update_ocv(col, unique)
  for i in range(0,len(unique)):
    df_copy = df_copy.replace({col: unique[i]}, i)
  return df_copy

def view_categorical_legend(col):
  return original_categorical_vals[col].dropna()

def __reclassify_list__(df, col, delimiter):
  df_copy = df.copy()
  column = df_copy[col]
  for i in range(0,len(column)):
    num = 0
    item = column[i]
    vals = item.split(delimiter)
    for each in vals:
      each = each.lower()
      if "two" in each: num = num + 2
      elif "three" in each: num = num + 3
      elif "four" in each: num = num + 4
      elif "five" in each: num = num + 5
      else: num = num + 1
    df_copy.at[i,col] = num
  return df_copy

# # print(df.dtypes)
# TODO: Play with category types??
# df_test = df['size'].astype('category')


# df = reclassify_categorical(df, 'size')
# df = reclassify_categorical(df, 'alignment')
# df = reclassify_categorical(df, 'type')
df = __reclassify_list__(df, 'languages', ", ")
df = __reclassify_list__(df, 'senses', ", ")
# print(df['attributes'][2].split(" | "))
# print(df['actions'][2].split(" | "))
# print(df['legendary_actions'][2].split(" | "))
# view_categorical_legend('alignment')

# temporary removing of string values so I can work only on num values
df = df.drop(['attributes','actions','legendary_actions'],axis=1)
# remove name and source because these don't contribute anything
df = df.drop(['name','source'],axis=1)
# fix numeric values into floats
# for each in df.columns:
#   if each not in original_categorical_vals.columns:
#     df[each] = df[each].astype(np.float32)
#   else:
#     # df[each] = df[each].astype(int) # apparently it doesn't like integers
#     df[each] = df[each].astype(np.float32)

# # categorical_cols = original_categorical_vals.columns
# target_col = 'cr'
# categorical_cols = ['size','alignment','type']
# numerical_cols = df.drop(categorical_cols,axis=1).columns
# numerical_cols = df.drop([target_col],axis=1).columns
# cat_maps = {}

# for col in categorical_cols:
#     unique_vals = df[col].unique()
#     cat_maps[col] = {val: i for i, val in enumerate(unique_vals)}

# for col in categorical_cols:
#     df[col] = df[col].map(cat_maps[col]).astype(int)

# items = ['numerical','categorical','cr']
# data_list = [] # this is the data that we are going to be using

# for _, row in df.iterrows():
#     data_list.append({
#         "numerical": row[numerical_cols].astype(float).tolist(),
#         "categorical": {col: int(row[col]) for col in categorical_cols},
#         "cr": float(row[target_col])
#     })

# def show_monster(monster):
#   print(f'Numerical:\t{data_list[monster]['numerical']}')
#   print(f'Categorical:\t{data_list[monster]['categorical']}')
#   print(f'CR:\t\t{data_list[monster]['cr']}')

# show_monster(1)
# show_monster(2)
# show_monster(3)

# Pytorch Model

In [231]:
# Device configuration, this is to check if GPU is available and run on GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
input_size = len(df.columns) # 48
hidden_size = 100 # number of nodes in hidden layer
num_classes = 33 # number of classes, 0, 1/4, 1/2, 1-30
num_epochs = 2 # number of times we go through the entire dataset
batch_size = 100 # number of samples in one forward/backward pass
learning_rate = 0.001 # learning rate


class MonsterDataset(Dataset):

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.CAT_COLS = ['size','alignment','type','legendary']
        self.NONNUMERIC_COLS = ['size','alignment','type','legendary','name','attributes','actions','legendary_actions']
        self.__parsecsv__(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    # def __parsecsvUNUSED__(self, csv_file):
    #     self.df_original = pd.read_csv(csv_file)
    #     self.df = self.df_original.copy()
    #     # temporary removing of string values so I can work only on num values
    #     self.df = self.df.drop(['attributes','actions','legendary_actions'],axis=1)
    #     # remove source because these don't contribute anything
    #     self.df = self.df.drop(['source'],axis=1)
    #     target_col = 'cr'
    #     categorical_cols = ['size','alignment','type']
    #     numerical_cols = self.df.drop(categorical_cols,axis=1).columns
    #     numerical_cols = self.df.drop([target_col,'name'],axis=1).columns
    #     self.__reclassify_list__('languages', ", ")
    #     self.__reclassify_list__('senses', ", ")
    #     self.cat_maps = {}

    #     for col in categorical_cols:
    #         unique_vals = self.df[col].unique()
    #         self.cat_maps[col] = {val: i for i, val in enumerate(unique_vals)}

    #     for col in categorical_cols:
    #         self.df[col] = self.df[col].map(self.cat_maps[col]).astype(int)

    #     items = ['numerical','categorical','cr']
    #     data_list = [] # this is the data that we are going to be using

    #     for _, row in self.df.iterrows():
    #         data_list.append({
    #             "numerical": row[numerical_cols].astype(float).tolist(),
    #             "categorical": {col: int(row[col]) for col in categorical_cols},
    #             "cr": float(row[target_col])
    #         })
        
    #     self.data = data_list
    
    def __parsecsv__(self, csv_file):
        self.df_original = pd.read_csv(csv_file)
        self.df = self.df_original.copy()
        self.original_categorical_vals = pd.DataFrame()
        
        self.__reclassify_categorical__('size')
        self.__reclassify_categorical__('alignment')
        self.__reclassify_categorical__('type')
        self.__reclassify_categorical__('legendary')
        self.__reclassify_list__('languages', ", ")
        self.__reclassify_list__('senses', ", ")

        # temporary removing of string values so I can work only on num values
        self.df = self.df.drop(['attributes','actions','legendary_actions'],axis=1)
        # remove source because these don't contribute anything
        self.df = self.df.drop(['source'],axis=1)
        
        self.__redefine_datatypes__()
    
    def __update_ocv__(self, col, unique):
        self.original_categorical_vals = pd.concat([self.original_categorical_vals, pd.DataFrame({col:unique})], axis=1)

    def __redefine_datatypes__(self):
        for each in self.df.columns:
            if each not in self.NONNUMERIC_COLS:
                self.df[each] = pd.to_numeric(self.df[each], errors='coerce').astype(np.float32)
            elif each == 'name':
                pass
            else:
                df[each] = df[each].astype(int) # apparently it doesn't like integers
                # self.df[each] = self.df[each].astype(np.float32)
    
    def __reclassify_categorical__(self, col):
        df_copy = self.df.copy()
        unique = df_copy[col].unique()
        self.__update_ocv__(col, unique)

        df_copy[col] = pd.Categorical(df_copy[col], categories=unique)
        df_copy[col+"_encoded"] = df_copy[col].cat.codes
        # df_copy[col+"_encoded"] = df_copy[col].map(self.original_categorical_vals[col])
        # for i in range(0,len(unique)):
        #   df_copy = df_copy.replace({col: unique[i]}, i)
        self.df = df_copy
    
    def __reclassify_list__(self, col, delimiter):
        df_copy = self.df.copy()
        column = df_copy[col]
        for i in range(0,len(column)):
            num = 0
            item = column[i]
            vals = item.split(delimiter)
            for each in vals:
                each = each.lower()
                if "two" in each: num = num + 2
                elif "three" in each: num = num + 3
                elif "four" in each: num = num + 4
                elif "five" in each: num = num + 5
                else: num = num + 1
            df_copy.at[i,col] = num
        self.df = df_copy
    
    def __len__(self):
        return len(self.df)
    
    # def __getitemUNUSED__(self, idx):
    #     if torch.is_tensor(idx):
    #         idx = idx.tolist()
        
    #     sample = self.data[idx]
    #     if self.transform:
    #         sample = self.transform(sample)

    #     return sample
    
    def getocv(self):
        return self.original_categorical_vals
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        item = self.df.iloc[idx]
        
        monster_name = item['name']
        numerical_data = item.drop(self.CAT_COLS)
        numerical_data = numerical_data.drop(['cr','name'])
        categorical_data = item[self.CAT_COLS]
        target_data = item['cr']
        sample = {'name':monster_name,'numerical':numerical_data,'categorical':categorical_data,'cr':target_data}
        
        if self.transform:
            sample = self.transform(sample)

        return sample
    
    def getdf(self):
        return self.df

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""
    def __call__(self, sample):
        numerical_data, categorical_data, target_data = sample['numerical'], sample['categorical'], sample['cr']
        return {'numerical': torch.tensor(numerical_data.values, dtype=torch.float32),
                'categorical': {col: torch.tensor(categorical_data[col].values, dtype=torch.long) for col in categorical_data},
                'cr': target_data}

# mds = MonsterDataset("aidedd_blocks2.csv","",transform=transforms.Compose([ToTensor()]))
mds = MonsterDataset("aidedd_blocks2.csv","",transform=transforms.Compose([ToTensor()]))
ocv = mds.getocv()

print(mds.__getitem__(1))

# for i, sample in enumerate(mds):
#     print(i, sample['numerical'], sample['categorical'])

#     if i == 3:
#         break


dataloader = DataLoader(mds, batch_size=batch_size, shuffle=True, num_workers=0)

# print(mds.__getitem__(2))




train_dataset = None
test_dataset = None
train_loader = None
test_loader = None

ocv
# ['medium', 'large', 'huge', 'gargantuan', 'small', 'tiny']
# ['tiny', 'small', 'medium', 'large', 'huge', 'gargantuan']

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

# Test Code Clipboard