## Database import & cleaning

In [1]:
#imports
import pandas as pd

### 1.1 Database used

First lets import a database of monsters. Luckily for us some people over Kaggle have already scraped a simple list at:

https://www.kaggle.com/datasets/travistyler/dnd-5e-monster-manual-stats?resource=download&select=cleaned_monsters_basic.csv

In [2]:
file_url = "https://www.kaggle.com/datasets/travistyler/dnd-5e-monster-manual-stats?resource=download&select=cleaned_monsters_basic.csv"
file = "cleaned_monsters_basic.csv"

In [3]:
#This database has so many columns we don't need. lets rename them and take only what we need

columns = ['0', 'name', 'size', 'monster_type', 'alignment', 'ac', 'hp',
       'str', 'str_mod', 'dex', 'dex_mod', 'con', 'con_mod', 'int',
       'int_mod', 'wis', 'wis_mod', 'cha', 'cha_mod', 'senses', 'languages',
       'cr', 'str_save', 'dex_save', 'con_save', 'int_save', 'wis_save',
       'cha_save', 'speed', 'swim', 'fly', 'climb', 'burrow',
       'number_legendary_actions', 'history', 'perception', 'stealth',
       'persuasion', 'insight', 'deception', 'arcana', 'religion',
       'acrobatics', 'athletics', 'intimidation']

useful_columns  = ['name', 'cr', 'ac', 'hp', 
                  'str', 'str_mod', 'dex', 'dex_mod', 'con', 'con_mod', 
                  'int','int_mod', 'wis', 'wis_mod', 'cha', 'cha_mod', 
                  'str_save', 'dex_save', 'con_save', 'int_save', 'wis_save', 'cha_save', 
                   'speed', 'size', 'monster_type', 'swim', 'fly', 'climb' ]

df = pd.read_csv(
                file, 
                header = 0, 
                names = columns,
                usecols = useful_columns 
                )
        
df = df.reset_index(drop = True)
#we will add a numeric version of cr level for commodity
df["numeric_cr"] = df.cr.apply(eval)
df.head(5)

Unnamed: 0,name,size,monster_type,ac,hp,str,str_mod,dex,dex_mod,con,...,dex_save,con_save,int_save,wis_save,cha_save,speed,swim,fly,climb,numeric_cr
0,Aboleth,Large,Aberration,17,135,21,5,9,-1,15,...,0,6,8,6,0,10,40,0,0,10.0
1,Acolyte,Medium,Humanoid (any race),10,9,10,0,10,0,10,...,0,0,0,0,0,30,0,0,0,0.25
2,Adult Black Dragon,Huge,Dragon,19,195,23,6,14,2,21,...,7,10,0,6,8,40,40,80,0,14.0
3,Adult Blue Dragon,Huge,Dragon,19,225,25,7,10,0,23,...,5,11,0,7,9,40,0,80,0,16.0
4,Adult Brass Dragon,Huge,Dragon,18,172,23,6,10,0,21,...,5,10,0,6,8,40,0,80,0,13.0


In [4]:
df

Unnamed: 0,name,size,monster_type,ac,hp,str,str_mod,dex,dex_mod,con,...,dex_save,con_save,int_save,wis_save,cha_save,speed,swim,fly,climb,numeric_cr
0,Aboleth,Large,Aberration,17,135,21,5,9,-1,15,...,0,6,8,6,0,10,40,0,0,10.00
1,Acolyte,Medium,Humanoid (any race),10,9,10,0,10,0,10,...,0,0,0,0,0,30,0,0,0,0.25
2,Adult Black Dragon,Huge,Dragon,19,195,23,6,14,2,21,...,7,10,0,6,8,40,40,80,0,14.00
3,Adult Blue Dragon,Huge,Dragon,19,225,25,7,10,0,23,...,5,11,0,7,9,40,0,80,0,16.00
4,Adult Brass Dragon,Huge,Dragon,18,172,23,6,10,0,21,...,5,10,0,6,8,40,0,80,0,13.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,Young Green Dragon,Large,Dragon,18,136,19,4,12,1,17,...,4,6,0,4,5,40,40,80,0,8.00
320,Young Red Dragon,Large,Dragon,18,178,23,6,10,0,21,...,4,9,0,4,8,40,0,80,40,10.00
321,Young Silver Dragon,Large,Dragon,18,168,23,6,10,0,21,...,4,9,0,4,8,40,0,80,0,9.00
322,Young White Dragon,Large,Dragon,17,133,18,4,10,0,18,...,3,7,0,3,4,40,40,80,0,6.00


### 1.2 Error handling

Nothing ever is perfect: through our study we have found an error on this database. We will try to clean it up.

In [5]:
df[df["name"] == "Tarrasque"]\
      [['name', 'dex', 'dex_mod', 'dex_save', 'con', 'con_mod', 'con_save']]\
      .style\
      .apply(lambda x: ['background-color: #B2B2B2' if x.values == 30 and x.name == "con" else ''], axis=0)\
      .apply(lambda x: ['background-color: #D1D1D1' if x.values == 1 and x.name == "con_mod" else ''], axis=0)

Unnamed: 0,name,dex,dex_mod,dex_save,con,con_mod,con_save
280,Tarrasque,11,0,0,30,1,0


This monster has a con stat of 30, but a mod of 1. Every stat value has a specified modifier, and 30's one is a +10

_(PS: in this table con is the stat, con_mod is the modificator, and con_save is the save when defined by other rules. If this last one is bigger it is used instead of con_mod)_

Lets quickly create a dictionary with the appropriated values

In [6]:
import math

stats_mods = dict(zip (range(1,31), [-5 + math.floor(x/2) for x in range(1,31)]))

Lets look for more errors where the stat mod isn't the right one

In [7]:
for stat in ['str','dex','con','int','wis','cha']:
    stat_mod = stat + "_mod"
    errors = df[ df[stat_mod] != df[stat].apply(lambda x : stats_mods[x]) ].shape[0]
    
    if errors >0:
        print(errors," errors in stat", stat)

5  errors in stat str
1  errors in stat con
1  errors in stat cha


7 stats mods aren't the good ones. If we would look into the errors we would indeed see there's something going on with stats with a score of 30. Let me skip the search and directly put out the monsters that have an error:


In [8]:
cols_to_see = ["name", 'str', 'str_mod', 'dex','dex_mod', 
               'con', 'con_mod', 'int', 'int_mod', 
               'wis', 'wis_mod', 'cha', 'cha_mod']

df[df[['str', 'dex', 'con','int', 'wis', 'cha']].eq(30).any(axis=1)][cols_to_see]\
    .style.apply(lambda x: ['background-color: #B2B2B2' if values == 30 else '' for values in x], axis=0)\
    .apply(lambda x: ['background-color: #d1d1d1' if values == 1 else '' for values in x], axis=0)

Unnamed: 0,name,str,str_mod,dex,dex_mod,con,con_mod,int,int_mod,wis,wis_mod,cha,cha_mod
18,Ancient Gold Dragon,30,1,14,2,29,9,18,4,17,3,28,9
20,Ancient Red Dragon,30,1,10,0,29,9,18,4,15,2,23,6
21,Ancient Silver Dragon,30,1,10,0,29,9,18,4,15,2,23,6
181,Kraken,30,1,11,0,25,7,22,6,18,4,20,5
257,Solar,26,8,22,6,26,8,25,7,25,7,30,1
280,Tarrasque,30,1,11,0,30,1,3,-4,11,0,11,0


It seems like stats with a score of 30 aren't properly passed to a +10 on the stat mod.<br>
Lets change the errors

In [9]:
for stat in ['str','dex','con','int','wis','cha']:
    stat_mod = stat + "_mod"
    df[stat_mod] = df[stat].apply(lambda x : stats_mods[x])

In [10]:
df_styled = df[df[['str', 'dex', 'con','int', 'wis', 'cha']].eq(30).any(axis=1)][cols_to_see]\
    .style.apply(lambda x: ['background-color: #B2B2B2' if values == 30 else '' for values in x], axis=0)\
    .apply(lambda x: ['background-color: #D1D1D1' if values == 10 and "mod" in x.name else '' for values in x], axis=0)
df_styled

Unnamed: 0,name,str,str_mod,dex,dex_mod,con,con_mod,int,int_mod,wis,wis_mod,cha,cha_mod
18,Ancient Gold Dragon,30,10,14,2,29,9,18,4,17,3,28,9
20,Ancient Red Dragon,30,10,10,0,29,9,18,4,15,2,23,6
21,Ancient Silver Dragon,30,10,10,0,29,9,18,4,15,2,23,6
181,Kraken,30,10,11,0,25,7,22,6,18,4,20,5
257,Solar,26,8,22,6,26,8,25,7,25,7,30,10
280,Tarrasque,30,10,11,0,30,10,3,-4,11,0,11,0


fixed!

In [11]:
df.to_csv("monsters.csv", index=False)