In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
# from sklearn.metrics import accuracy_score
# from sklearn.ensemble import RandomForestClassifier
# import tensorflow as tf

In [2]:
# Import Data
# mstats_df = pd.read_csv('AMAZON AWS URL')
mstats_df = pd.read_csv('cleaned_dnd_monsters.csv')
mstats_df.head(5)

Unnamed: 0,name,url,cr,size,ac,hp,legendary,str,dex,con,int,wis,cha,fly,swim,type
0,aarakocra,https://www.aidedd.org/dnd/monstres.php?vo=aar...,0.25,Medium,12,13,,10.0,14.0,10.0,11.0,12.0,11.0,Yes,No,humanoid
1,aboleth,https://www.aidedd.org/dnd/monstres.php?vo=abo...,10.0,Large,17,135,Legendary,21.0,9.0,15.0,18.0,15.0,18.0,No,No,aberration
2,acolyte,https://www.aidedd.org/dnd/monstres.php?vo=aco...,0.25,Medium,10,9,,10.0,10.0,10.0,10.0,14.0,11.0,No,No,humanoid
3,adult-black-dragon,https://www.aidedd.org/dnd/monstres.php?vo=adu...,14.0,Huge,19,195,Legendary,23.0,14.0,21.0,14.0,13.0,17.0,Yes,Yes,dragon
4,adult-blue-dragon,https://www.aidedd.org/dnd/monstres.php?vo=adu...,16.0,Huge,19,225,Legendary,25.0,10.0,23.0,16.0,15.0,19.0,Yes,No,dragon


In [3]:
# Remove Name & URL
mstats_df = mstats_df.drop(['name','url'], axis=1)
# Change Size to numerical data (Tiny=1,Small=2,Medium=3,Large=4,Huge=5,Gargantuan=6)
mstats_df['size'] = mstats_df['size'].map( {'Tiny': 1, 'Small': 2, 'Medium':3, 'Large': 4, 'Huge':5, 'Gargantuan':6})
mstats_df['size'].astype(int)
# Generate our categorical variable lists
mstats_cat = mstats_df.dtypes[mstats_df.dtypes == "object"].index.tolist()
# Check the number of unique values in each column
mstats_df[mstats_cat].nunique()

legendary     1
fly           2
swim          2
type         19
dtype: int64

In [4]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)
# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(mstats_df[mstats_cat]))
# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(mstats_cat)
encode_df.head()



Unnamed: 0,legendary_Legendary,legendary_nan,fly_No,fly_Yes,swim_No,swim_Yes,type_aberration,type_beast,type_celestial,type_construct,...,type_giant,type_giant.1,type_humanoid,type_monstrosity,type_monstrosity.1,type_ooze,type_plant,type_swarm of Tiny beasts,type_undead,type_undead.1
0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# Merge one-hot encoded features and drop the originals
mstats_df = mstats_df.merge(encode_df,left_index=True, right_index=True)
mstats_df = mstats_df.drop(mstats_cat,1)
mstats_df.columns

  This is separate from the ipykernel package so we can avoid doing imports until


Index(['cr', 'size', 'ac', 'hp', 'str', 'dex', 'con', 'int', 'wis', 'cha',
       'legendary_Legendary', 'legendary_nan', 'fly_No', 'fly_Yes', 'swim_No',
       'swim_Yes', 'type_aberration', 'type_beast', 'type_celestial',
       'type_construct', 'type_dragon', 'type_elemental', 'type_fey',
       'type_fiend', 'type_fiend ', 'type_giant', 'type_giant ',
       'type_humanoid ', 'type_monstrosity', 'type_monstrosity ', 'type_ooze',
       'type_plant', 'type_swarm of Tiny beasts', 'type_undead',
       'type_undead '],
      dtype='object')

In [6]:
# Split our preprocessed data into our features and target arrays
y = mstats_df["cr"].values
X = mstats_df.drop(['cr'],axis=1).values
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [7]:
# Preprocess numerical data
# Create a StandardScaler instances
scaler = StandardScaler()
# Fit the StandardScaler
X_scaler = scaler.fit(X_train)
# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
model = LinearRegression()
model.fit(X_train_scaled, y_train)
training_score = model.score(X_train_scaled, y_train)
testing_score = model.score(X_test_scaled, y_test)

print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

Training Score: 0.9461787086880084
Testing Score: 0.8849831652851377


In [14]:
# # Define the deep learning model
# nn_model = tf.keras.models.Sequential()
# nn_model.add(tf.keras.layers.Dense(units=20, activation="relu", input_dim=39))
# nn_model.add(tf.keras.layers.Dense(units=10, activation="relu"))
# nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
# # Compile the Sequential model together and customize metrics
# nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
# # Train the model
# fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)
# # Evaluate the model using the test data
# model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
# print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
2/2 - 0s - loss: -7.7848e+03 - accuracy: 0.0984 - 351ms/epoch - 175ms/step
Loss: -7784.80615234375, Accuracy: 0.09836065769195557
