# Dungeons & Dragons Classification Project

## Loading the data

In [7]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV

def load_and_encode_data():
    global label_encoder
    with open('aberrations_array.json', 'r') as f:
        aberrations_array = json.load(f)
    aberrations_array = [np.array(arr) for arr in aberrations_array]

    with open('beasts_array.json', 'r') as f:
        beasts_array = json.load(f)
    beasts_array = [np.array(arr) for arr in beasts_array]

    with open('celestials_array.json', 'r') as f:
        celestials_array = json.load(f)
    celestials_array = [np.array(arr) for arr in celestials_array]

    with open('constructs_array.json', 'r') as f:
        constructs_array = json.load(f)
    constructs_array = [np.array(arr) for arr in constructs_array]

    with open('dragons_array.json', 'r') as f:
        dragons_array = json.load(f)
    dragons_array = [np.array(arr) for arr in dragons_array]

    with open('elementals_array.json', 'r') as f:
        elementals_array = json.load(f)
    elementals_array = [np.array(arr) for arr in elementals_array]

    with open('fey_array.json', 'r') as f:
        fey_array = json.load(f)
    fey_array = [np.array(arr) for arr in fey_array]

    with open('fiends_array.json', 'r') as f:
        fiends_array = json.load(f)
    fiends_array = [np.array(arr) for arr in fiends_array]

    with open('giants_array.json', 'r') as f:
        giants_array = json.load(f)
    giants_array = [np.array(arr) for arr in giants_array]

    with open('monstrosoties_array.json', 'r') as f:
        monstrosoties_array = json.load(f)
    monstrosoties_array = [np.array(arr) for arr in monstrosoties_array]

    with open('oozes_array.json', 'r') as f:
        oozes_array = json.load(f)
    oozes_array = [np.array(arr) for arr in oozes_array]

    with open('plants_array.json', 'r') as f:
        plants_array = json.load(f)
    plants_array = [np.array(arr) for arr in plants_array]

    with open('undead_array.json', 'r') as f:
        undead_array = json.load(f)
    undead_array = [np.array(arr) for arr in undead_array]

    data = aberrations_array + beasts_array + celestials_array + constructs_array + dragons_array + elementals_array + fey_array + fiends_array + giants_array + monstrosoties_array + oozes_array + plants_array + undead_array
    labels = ['Aberrations'] * len(aberrations_array) + ['Beasts'] * len(beasts_array) + ['Celestials'] * len(celestials_array) + ['Constructs'] * len(constructs_array) + ['Dragons'] * len(dragons_array) + ['Elementals'] * len(elementals_array) + ['Fey'] * len(fey_array) + ['Fiends'] * len(fiends_array) + ['Giants'] * len(giants_array) + ['Monstrosoties'] * len(monstrosoties_array) + ['Oozes'] * len(oozes_array) + ['Plants'] * len(plants_array) + ['Undead'] * len(undead_array)
    # Encode labels to numerical format
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(labels)

    # Split the data into training and testing sets
    
    X_train, X_test, y_train, y_test = train_test_split(data, encoded_labels, stratify=encoded_labels, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = load_and_encode_data()

## Creating the model

In [8]:
# Create the model
model = RandomForestClassifier(n_estimators=200, random_state=81, class_weight='balanced')

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_features': ['sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'criterion': ['gini', 'entropy'],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42)

# Create GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv, n_jobs=-1, verbose=2)

# Fit the model
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


## Evalutaing the model

In [9]:
# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model
best_y_pred = best_model.predict(X_test)
best_accuracy = accuracy_score(y_test, best_y_pred)
best_report = classification_report(y_test, best_y_pred, target_names=label_encoder.classes_)

print(f'Best Model Accuracy: {best_accuracy}')
print('Best Model Classification Report:')
print(best_report)

Best Model Accuracy: 0.3723404255319149
Best Model Classification Report:
               precision    recall  f1-score   support

  Aberrations       0.50      0.25      0.33         8
       Beasts       0.25      0.50      0.33        14
   Celestials       0.00      0.00      0.00         2
   Constructs       0.62      0.83      0.71        12
      Dragons       0.00      0.00      0.00         3
   Elementals       0.33      0.17      0.22         6
          Fey       0.75      0.60      0.67        10
       Fiends       0.33      0.29      0.31         7
       Giants       0.00      0.00      0.00         3
Monstrosoties       0.31      0.36      0.33        11
        Oozes       0.00      0.00      0.00         3
       Plants       0.12      0.12      0.12         8
       Undead       0.33      0.29      0.31         7

     accuracy                           0.37        94
    macro avg       0.27      0.26      0.26        94
 weighted avg       0.36      0.37      0.35

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
