In [None]:
import pandas as pd
import os
os.environ["PATH"] += os.pathsep + r"C:\\Program Files\\Graphviz\bin"

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz
import subprocess

In [35]:
# let's get the dataset in
df = pd.read_csv("dnd_monsters.csv")
used_columns = ['cr', 'ac', 'hp', 'str', 'dex', 'con', 'int', 'wis', 'cha']
df = df[used_columns]
nans = dict()
for column in used_columns:
    nans[column] = int(df[column].isna().sum())
df = df[df['cr'].isna() == False]


def cr_standardiser(x) -> str:
    try:
        return str(float(x))
    except ValueError:
        if x == '1/8':
            return '0.125'
        elif x == '1/4':
            return '0.25'
        elif x == '1/2':
            return '0.5'
        else:
            raise ValueError(f'unexpected value: {x}')


df['cr'] = df['cr'].apply(cr_standardiser)
df = df[df['str'].isna() == False]
df

Unnamed: 0,cr,ac,hp,str,dex,con,int,wis,cha
0,0.25,12,13,10.0,14.0,10.0,11.0,12.0,11.0
2,10.0,17,135,21.0,9.0,15.0,18.0,15.0,18.0
5,0.25,10,9,10.0,10.0,10.0,10.0,14.0,11.0
6,14.0,19,195,23.0,14.0,21.0,14.0,13.0,17.0
8,16.0,19,225,25.0,10.0,23.0,16.0,15.0,19.0
...,...,...,...,...,...,...,...,...,...
753,3.0,12,66,16.0,14.0,13.0,14.0,12.0,16.0
755,4.0,14,71,16.0,14.0,13.0,14.0,12.0,16.0
757,1.0,11,40,11.0,12.0,11.0,13.0,12.0,14.0
759,26.0,21,580,27.0,24.0,28.0,26.0,27.0,30.0


In [None]:
X = df.drop(columns=['cr']) 
y = df['cr']

clf = DecisionTreeClassifier()
model = clf.fit(X, y)

In [None]:
list_o_cr = [str(c) for c in clf.classes_]

# and let's create the decision tree
export_graphviz(
    clf, 
    feature_names=X.columns,  
    class_names=list_o_cr,
    filled=True, rounded=True, node_ids=True, out_file='tree.dot'
)

# Convert DOT to SVG
subprocess.call(['dot', '-Tsvg', 'tree.dot', '-o', 'tree.svg'])

0

I learned that making classification 30 classes for decision trees results in an incredibly complex output (honestly I should've realized this in advance).

As I was checking through a few branches of the tree I saw that health (hp) is the major factor in the decision tree as well as AC which was expected.

However I would've assumed to see one or two ability scores also dominate higher levels of the tree which was not the case, meaning generally all scores are quite important, although this is no a definite find just based on a single tree and some looking around.