# Imports

In [49]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import scipy
from scipy.stats import zscore
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import kagglehub
from kagglehub import KaggleDatasetAdapter
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning) # TODO: Actually optimize the source of this warning

# Import Dataframe

In [16]:
# # Set the path to the file you'd like to load
# file_path = "aidedd_blocks2.csv"

# # Load the latest version
# df_original = kagglehub.dataset_load(
#   KaggleDatasetAdapter.PANDAS,
#   "travistyler/dnd-5e-monster-manual-stats",
#   file_path,
#   # Provide any additional arguments like
#   # sql_query or pandas_kwargs. See the
#   # documenation for more information:
#   # https://github.com/Kaggle/kagglehub/blob/main/README.md#kaggledatasetadapterpandas
# )

# df = df_original.copy()
# # Create dataframe to keep track of the original values of each categorical data
# original_categorical_vals = pd.DataFrame()

# def update_ocv(col, unique):
#   global original_categorical_vals
#   original_categorical_vals = pd.concat([original_categorical_vals, pd.DataFrame({col:unique})], axis=1)

# def __reclassify_categorical__(df, col):
#   df_copy = df.copy()
#   unique = df_copy[col].unique()
#   update_ocv(col, unique)
#   for i in range(0,len(unique)):
#     df_copy = df_copy.replace({col: unique[i]}, i)
#   return df_copy

# def view_categorical_legend(col):
#   return original_categorical_vals[col].dropna()

# def __reclassify_list__(df, col, delimiter):
#   df_copy = df.copy()
#   column = df_copy[col]
#   for i in range(0,len(column)):
#     num = 0
#     item = column[i]
#     vals = item.split(delimiter)
#     for each in vals:
#       each = each.lower()
#       if "two" in each: num = num + 2
#       elif "three" in each: num = num + 3
#       elif "four" in each: num = num + 4
#       elif "five" in each: num = num + 5
#       else: num = num + 1
#     df_copy.at[i,col] = num
#   return df_copy

# # # print(df.dtypes)
# # TODO: Play with category types??
# # df_test = df['size'].astype('category')


# # df = reclassify_categorical(df, 'size')
# # df = reclassify_categorical(df, 'alignment')
# # df = reclassify_categorical(df, 'type')
# df = __reclassify_list__(df, 'languages', ", ")
# df = __reclassify_list__(df, 'senses', ", ")
# # print(df['attributes'][2].split(" | "))
# # print(df['actions'][2].split(" | "))
# # print(df['legendary_actions'][2].split(" | "))
# # view_categorical_legend('alignment')

# # temporary removing of string values so I can work only on num values
# df = df.drop(['attributes','actions','legendary_actions'],axis=1)
# # remove name and source because these don't contribute anything
# df = df.drop(['name','source'],axis=1)
# # fix numeric values into floats
# # for each in df.columns:
# #   if each not in original_categorical_vals.columns:
# #     df[each] = df[each].astype(np.float32)
# #   else:
# #     # df[each] = df[each].astype(int) # apparently it doesn't like integers
# #     df[each] = df[each].astype(np.float32)

# # # categorical_cols = original_categorical_vals.columns
# # target_col = 'cr'
# # categorical_cols = ['size','alignment','type']
# # numerical_cols = df.drop(categorical_cols,axis=1).columns
# # numerical_cols = df.drop([target_col],axis=1).columns
# # cat_maps = {}

# # for col in categorical_cols:
# #     unique_vals = df[col].unique()
# #     cat_maps[col] = {val: i for i, val in enumerate(unique_vals)}

# # for col in categorical_cols:
# #     df[col] = df[col].map(cat_maps[col]).astype(int)

# # items = ['numerical','categorical','cr']
# # data_list = [] # this is the data that we are going to be using

# # for _, row in df.iterrows():
# #     data_list.append({
# #         "numerical": row[numerical_cols].astype(float).tolist(),
# #         "categorical": {col: int(row[col]) for col in categorical_cols},
# #         "cr": float(row[target_col])
# #     })

# # def show_monster(monster):
# #   print(f'Numerical:\t{data_list[monster]['numerical']}')
# #   print(f'Categorical:\t{data_list[monster]['categorical']}')
# #   print(f'CR:\t\t{data_list[monster]['cr']}')

# # show_monster(1)
# # show_monster(2)
# # show_monster(3)

# Pytorch Model

In [244]:
# Device configuration, this is to check if GPU is available and run on GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
# input_size = len(df.columns) # 48
input_size = 48
hidden_size = 100 # number of nodes in hidden layer
num_classes = 33 # number of classes, 0, 1/4, 1/2, 1-30
num_epochs = 2 # number of times we go through the entire dataset
batch_size = 100 # number of samples in one forward/backward pass
learning_rate = 0.001 # learning rate


class MonsterDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.CAT_COLS = ['size','alignment','type','legendary']
        self.NONNUMERIC_COLS = ['size','alignment','type','legendary','name','attributes','actions','legendary_actions']
        self.__parsecsv__(csv_file)
        self.root_dir = root_dir
        self.transform = transform
    
    def __parsecsv__(self, csv_file):
        self.df_original = pd.read_csv(csv_file)
        self.df = self.df_original.copy()
        self.original_categorical_vals = pd.DataFrame()

        self.__reclassify_categorical__('size')
        self.__reclassify_categorical__('alignment')
        self.__reclassify_categorical__('type')
        self.__reclassify_categorical__('legendary')
        self.__reclassify_list__('languages', ", ")
        self.__reclassify_list__('senses', ", ")

        # temporary removing of string values so I can work only on num values
        self.df = self.df.drop(['attributes','actions','legendary_actions'],axis=1)
        # remove source because these don't contribute anything
        self.df = self.df.drop(['source'],axis=1)
        
        self.__redefine_datatypes__()

        for col in self.CAT_COLS:
            self.dummify_cat_values(col)

        # z-score normalization (if we need it)
        # num_cols = self.df.select_dtypes('float32').columns
        # for each in num_cols:
        #     if each != 'cr':
        #         self.normalize_zscore(each)
        
    def dummify_cat_values(self, col):
        df_copy = self.df.copy()
        dummies = pd.get_dummies(df_copy[col],prefix=col).astype('float32')
        df_copy = pd.concat([df_copy,dummies],axis=1)
        df_copy = df_copy.drop([col],axis=1)
        self.df = df_copy
    
    def __update_ocv__(self, df, col, unique):
        self.original_categorical_vals = pd.concat([self.original_categorical_vals, pd.DataFrame({col:unique})], axis=1)

    def __redefine_datatypes__(self):
        df_copy = self.df.copy()
        for each in df_copy.columns:
            if each in self.CAT_COLS:
                df_copy[each] = df_copy[each].astype('category')
            elif each == 'name':
                pass
            else:
                df_copy[each] = pd.to_numeric(df_copy[each], errors='coerce').astype(np.float32)
        self.df = df_copy
    
    def __reclassify_categorical__(self, col):
        df_copy = self.df.copy()
        if col == 'type':
            for i,each in enumerate(df_copy[col]):
                if "(" in each:
                    df_copy.at[i,col] = each[:(each.find("(")-1)]
        elif col == 'alignment': # TODO: reduce dimensionality for alignment
            pass
        unique = df_copy[col].unique()
        self.__update_ocv__(df_copy, col, unique)
        self.df = df_copy
    
    def __reclassify_list__(self, col, delimiter):
        df_copy = self.df.copy()
        column = df_copy[col]
        for i in range(0,len(column)):
            num = 0
            item = column[i]
            vals = item.split(delimiter)
            for each in vals:
                each = each.lower()
                if "two" in each: num = num + 2
                elif "three" in each: num = num + 3
                elif "four" in each: num = num + 4
                elif "five" in each: num = num + 5
                else: num = num + 1
            df_copy.at[i,col] = num
        self.df = df_copy
    
    def __len__(self):
        return len(self.df)
    
    # def normalize_zscore(self, col):
    #     df_copy = self.df.copy()
    #     df_copy[col] = zscore(df_copy[col])
    #     self.df = df_copy
    
    def getocv(self):
        return self.original_categorical_vals
    
    def create_subdf(self,substring):
        df_copy = self.df.copy()
        subdf = pd.DataFrame()
        for each in df_copy:
            if substring in each:
                subdf = pd.concat([subdf,df_copy[each]],axis=1)
        return subdf
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        df_copy = self.df.copy()
        
        monster_name = df_copy['name']
        cat_size = self.create_subdf("size")
        cat_type = self.create_subdf("type")
        cat_alignment = self.create_subdf("alignment")
        cat_legendary = self.create_subdf("legendary")
        numeric = df_copy.copy()
        numeric = numeric.drop(cat_size.columns,axis=1)
        numeric = numeric.drop(cat_type.columns,axis=1)
        numeric = numeric.drop(cat_alignment.columns,axis=1)
        numeric = numeric.drop(cat_legendary.columns,axis=1)
        numeric = numeric.drop(['name','cr'],axis=1)
        target_value = df_copy['cr']

        monster_name = monster_name[idx]
        cat_size = cat_size.iloc[idx]
        cat_type = cat_type.iloc[idx]
        cat_alignment = cat_alignment.iloc[idx]
        cat_legendary = cat_legendary.iloc[idx]
        numeric = numeric.iloc[idx]
        target_value = target_value[idx]

        cat_size = torch.tensor(cat_size, dtype=torch.long)
        cat_type = torch.tensor(cat_type, dtype=torch.long)
        cat_alignment = torch.tensor(cat_alignment, dtype=torch.long)
        cat_legendary = torch.tensor(cat_legendary, dtype=torch.long)
        numeric = torch.tensor(numeric, dtype=torch.float32)

        return monster_name,numeric,cat_size,cat_type,cat_alignment,cat_legendary,target_value
    
    def getdf(self):
        return self.df

train_dataset = MonsterDataset("aidedd_blocks2.csv","")
test_dataset = MonsterDataset("aidedd_blocks2.csv","")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

train_df = train_dataset.getdf()

input_size = len(train_df.columns)


# train_dataset.__getitem__(2)
# print(train_dataset.__getitem__(2))

# for i, sample in enumerate(train_dataset):
#     print(i, numerical, categorical, target)

#     if i == 3:
#         break

# examples = iter(test_loader) # create iterable object
# samples, labels = next(examples)  # unpack the batch
# print(f'Shape of samples: {samples.shape}, shape of labels: {labels.shape}')

ocv = train_dataset.getocv()
for each in ocv:
    print(each, len(ocv[each].dropna()))
# ocv
# train_dataset.getdf()
# ['medium', 'large', 'huge', 'gargantuan', 'small', 'tiny']
# ['tiny', 'small', 'medium', 'large', 'huge', 'gargantuan']

size 6
alignment 17
type 15
legendary 2


In [None]:
class CRPredictor(nn.Module):
    def __init__(self, input_size, size_cats, type_cats, alignment_cats, legendary_cats):
        super(CRPredictor,self).__init__()# Process numeric features
        self.numeric_net = nn.Sequential(
            nn.Linear(numeric_input_size, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3)
        )
        
        # Process categorical features
        cat_input_size = size_cats + type_cats + alignment_cats + legendary_cats
        self.categorical_net = nn.Sequential(
            nn.Linear(cat_input_size, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3)
        )
        
        # Combined network
        combined_size = 128 + 64  # outputs from numeric and categorical nets
        self.combined_net = nn.Sequential(
            nn.Linear(combined_size, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)  # Single output for CR prediction
        )

        # self.emb_size = nn.Embedding(7,2)
        # self.emb_alignment = nn.Embedding(18,2)
        # self.emb_type = nn.Embedding(16,2)
        # self.emb_legendary = nn.Embedding(2,2)
        # self.n_emb = 7+18+16+2 # total = 43
        
        # self.l1 = nn.Linear(input_size-4+self.n_emb,hidden_size) # first layer
        # self.relu = nn.ReLU() # activation function
        # self.l2 = nn.Linear(hidden_size,num_classes) # second layer
    
    def forward(self, numeric, cat_size,cat_type,cat_alignment,cat_legendary):
        
        cat_size = cat_size.float()
        cat_type = cat_type.float()
        cat_alignment = cat_alignment.float()
        cat_legendary = cat_legendary.float()
        
        # Process numeric features
        numeric_out = self.numeric_net(numeric)
        
        # Concatenate all categorical features
        categorical = torch.cat([cat_size, cat_type, cat_alignment, cat_legendary], dim=1)
        categorical_out = self.categorical_net(categorical)
        
        # Combine both pathways
        combined = torch.cat([numeric_out, categorical_out], dim=1)
        output = self.combined_net(combined)

        return output.squeeze()
        # emb1 = self.emb_size(cat1)
        # emb2 = self.emb_alignment(cat2)
        # emb3 = self.emb_type(cat3)
        # emb4 = self.emb_legendary(cat4)
        # # numerical = self.emb_numeric(numerical)

        

        # x_emb = torch.cat([emb1,emb2,emb3,emb4],dim=1)
        # # print("em1size",x_emb.size())
        # # print(x_emb)
        # print("em1size",emb1.size())
        # print("em2size",emb2.size())
        # print("em3size",emb3.size())
        # print("em4size",emb4.size())
        # print("numsize",numerical.size())
        # x = torch.cat([x_emb,numerical],dim=1)
        # x = self.l1(x)
        # x = self.relu(x)
        # x = self.l2(x)
        # # no activation function and no softmax at the end
        # return x

model = CRPredictor(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

KeyError: 1

In [247]:
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i, (name,numerical,cat1,cat2,cat3,cat4,target) in enumerate(train_loader):
        
        outputs = model(numerical,cat1,cat2,cat3,cat4)
        loss = criterion(outputs,target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_total_steps}, loss = {loss.item():.4f}')
            # print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_total_steps}, loss = {0:.4f}')

print("Finished training.")

  cat_size = torch.tensor(cat_size, dtype=torch.long)
  cat_type = torch.tensor(cat_type, dtype=torch.long)
  cat_alignment = torch.tensor(cat_alignment, dtype=torch.long)
  cat_legendary = torch.tensor(cat_legendary, dtype=torch.long)
  numeric = torch.tensor(numeric, dtype=torch.float32)
  cat_size = torch.tensor(cat_size, dtype=torch.long)
  cat_type = torch.tensor(cat_type, dtype=torch.long)
  cat_alignment = torch.tensor(cat_alignment, dtype=torch.long)
  cat_legendary = torch.tensor(cat_legendary, dtype=torch.long)
  numeric = torch.tensor(numeric, dtype=torch.float32)
  cat_size = torch.tensor(cat_size, dtype=torch.long)
  cat_type = torch.tensor(cat_type, dtype=torch.long)
  cat_alignment = torch.tensor(cat_alignment, dtype=torch.long)
  cat_legendary = torch.tensor(cat_legendary, dtype=torch.long)
  numeric = torch.tensor(numeric, dtype=torch.float32)
  cat_size = torch.tensor(cat_size, dtype=torch.long)
  cat_type = torch.tensor(cat_type, dtype=torch.long)
  cat_alignment = t

AttributeError: 'CRPredictor' object has no attribute 'numeric_net'

# Test Code Clipboard