### Importing packages and Reading in the data


Before we can begin exploring and manipulating any database we need to import the packages that we will be using in the project. for the purposes of this basic tutorial we only need the pandas package.

In [1]:
# Import pandas
import pandas as pd

Once we have imported our packages we can read in the data we are working with for this project. In this case we are using a database of stats from dungeons and dragons characters that I found somewhere on the internet. The file is in the format of a column seperated file or CSV and can be imported as a data frame with the pd.read_csv function. Once imported I like to use the head function to look at the first 5 rows and make sure the import worked.

In [None]:
# Read data from a CSV file
df = pd.read_csv('dnd_stats.csv')

# Display the first 5 rows of the DataFrame
df.head()

### Inspecting the data

Once we have imported the data, before we can start manipulating it we need to inspect it to have a better idea of what the data looks like. The 'dtypes' method can be used to identify the object ype in each field

In [3]:
# Show the data types of each column
df.dtypes

race            object
height           int64
weight           int64
speed            int64
strength         int64
dexterity        int64
constitution     int64
intelligence     int64
wisdom           int64
charisma         int64
dtype: object

The describe method will output a series of statistics describing all the numerical feild in the data frame

In [5]:
# Get a quick statistic summary of your data
df.describe()

Unnamed: 0,height,weight,speed,strength,dexterity,constitution,intelligence,wisdom,charisma
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,59.5975,146.8635,28.3245,12.8513,12.8384,12.7528,12.7529,12.4165,12.8701
std,13.283107,68.304729,2.360245,2.96494,2.945531,2.924953,2.947119,2.89283,2.974746
min,33.0,39.0,25.0,3.0,3.0,3.0,3.0,3.0,3.0
25%,52.0,120.0,25.0,11.0,11.0,11.0,11.0,10.0,11.0
50%,64.0,154.0,30.0,13.0,13.0,13.0,13.0,13.0,13.0
75%,69.0,188.0,30.0,15.0,15.0,15.0,15.0,15.0,15.0
max,82.0,367.0,30.0,20.0,20.0,20.0,20.0,19.0,20.0


The shape method will output the number of rows and the number of columns in your data frame

In [6]:
# Show the shape of the DataFrame (rows, columns)
df.shape

(10000, 10)

### Data selection

In [9]:
# Select a single column
walk_speed = df['speed']

stren_dex = df[['strength','dexterity']]

print(walk_speed,stren_dex)

0       30
1       30
2       30
3       30
4       30
        ..
9995    30
9996    30
9997    30
9998    30
9999    30
Name: speed, Length: 10000, dtype: int64       strength  dexterity
0           14         15
1           11         11
2           14         16
3            8         19
4            8         17
...        ...        ...
9995        13         18
9996         8         14
9997         4          5
9998        12         16
9999        16         17

[10000 rows x 2 columns]


In [11]:
# Select rows 0 through 2
df[0:3]



Unnamed: 0,race,height,weight,speed,strength,dexterity,constitution,intelligence,wisdom,charisma
0,dragonborn,79,279,30,14,15,12,7,13,16
1,dragonborn,79,331,30,11,11,11,13,13,17
2,dragonborn,71,230,30,14,16,11,13,10,18


In [12]:
# Select rows where heaight is greater than 50
df[df['height'] > 50]

Unnamed: 0,race,height,weight,speed,strength,dexterity,constitution,intelligence,wisdom,charisma
0,dragonborn,79,279,30,14,15,12,7,13,16
1,dragonborn,79,331,30,11,11,11,13,13,17
2,dragonborn,71,230,30,14,16,11,13,10,18
3,dragonborn,75,265,30,8,19,7,10,13,16
4,dragonborn,72,229,30,8,17,14,14,10,9
...,...,...,...,...,...,...,...,...,...,...
9995,tiefling,63,146,30,13,18,17,13,15,13
9996,tiefling,72,230,30,8,14,13,6,14,20
9997,tiefling,66,137,30,4,5,16,12,12,15
9998,tiefling,68,143,30,12,16,12,15,9,16


In [14]:
# Drop a column
df.drop('height', axis=1)

Unnamed: 0,race,weight,speed,strength,dexterity,constitution,intelligence,wisdom,charisma
0,dragonborn,279,30,14,15,12,7,13,16
1,dragonborn,331,30,11,11,11,13,13,17
2,dragonborn,230,30,14,16,11,13,10,18
3,dragonborn,265,30,8,19,7,10,13,16
4,dragonborn,229,30,8,17,14,14,10,9
...,...,...,...,...,...,...,...,...,...
9995,tiefling,146,30,13,18,17,13,15,13
9996,tiefling,230,30,8,14,13,6,14,20
9997,tiefling,137,30,4,5,16,12,12,15
9998,tiefling,143,30,12,16,12,15,9,16


In [16]:
# Drop multiple columns
df.drop(['height','weight'], axis = 1)

Unnamed: 0,race,speed,strength,dexterity,constitution,intelligence,wisdom,charisma
0,dragonborn,30,14,15,12,7,13,16
1,dragonborn,30,11,11,11,13,13,17
2,dragonborn,30,14,16,11,13,10,18
3,dragonborn,30,8,19,7,10,13,16
4,dragonborn,30,8,17,14,14,10,9
...,...,...,...,...,...,...,...,...
9995,tiefling,30,13,18,17,13,15,13
9996,tiefling,30,8,14,13,6,14,20
9997,tiefling,30,4,5,16,12,12,15
9998,tiefling,30,12,16,12,15,9,16


### Data manipulation

In [17]:
# Rename a column
df.rename(columns={'charisma': 'rizz'})

Unnamed: 0,race,height,weight,speed,strength,dexterity,constitution,intelligence,wisdom,rizz
0,dragonborn,79,279,30,14,15,12,7,13,16
1,dragonborn,79,331,30,11,11,11,13,13,17
2,dragonborn,71,230,30,14,16,11,13,10,18
3,dragonborn,75,265,30,8,19,7,10,13,16
4,dragonborn,72,229,30,8,17,14,14,10,9
...,...,...,...,...,...,...,...,...,...,...
9995,tiefling,63,146,30,13,18,17,13,15,13
9996,tiefling,72,230,30,8,14,13,6,14,20
9997,tiefling,66,137,30,4,5,16,12,12,15
9998,tiefling,68,143,30,12,16,12,15,9,16


In [20]:
# Apply a function to a column
df['speed'].apply(lambda x: x*2)


0       60
1       60
2       60
3       60
4       60
        ..
9995    60
9996    60
9997    60
9998    60
9999    60
Name: speed, Length: 10000, dtype: int64

In [21]:
# Group by a column and get mean of the other columns
df.groupby('intelligence').mean()

  df.groupby('intelligence').mean()


Unnamed: 0_level_0,height,weight,speed,strength,dexterity,constitution,wisdom,charisma
intelligence,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,59.333333,144.0,28.333333,13.166667,13.833333,12.833333,11.5,12.333333
4,66.181818,174.863636,29.318182,13.318182,13.454545,13.409091,10.772727,12.5
5,60.723077,160.6,28.461538,13.138462,12.430769,13.061538,12.184615,12.692308
6,60.701613,159.951613,28.306452,12.653226,12.508065,12.967742,12.443548,12.532258
7,61.278481,156.71308,28.586498,12.860759,13.164557,12.894515,12.113924,12.797468
8,60.661111,155.727778,28.458333,12.991667,13.125,12.625,12.094444,12.891667
9,60.385,151.556667,28.433333,12.915,13.026667,12.751667,12.583333,12.74
10,60.418224,152.507009,28.463785,12.974299,12.926402,12.82243,12.516355,13.021028
11,60.339787,152.585673,28.436592,12.887706,12.737657,12.883833,12.441433,12.835431
12,59.481023,148.143564,28.2467,12.792904,12.849835,12.773102,12.281353,12.89604


In [23]:
# Create a new column as a function of existing columns
df['density'] = df['weight'] / df['height']
df

Unnamed: 0,race,height,weight,speed,strength,dexterity,constitution,intelligence,wisdom,charisma,density
0,dragonborn,79,279,30,14,15,12,7,13,16,3.531646
1,dragonborn,79,331,30,11,11,11,13,13,17,4.189873
2,dragonborn,71,230,30,14,16,11,13,10,18,3.239437
3,dragonborn,75,265,30,8,19,7,10,13,16,3.533333
4,dragonborn,72,229,30,8,17,14,14,10,9,3.180556
...,...,...,...,...,...,...,...,...,...,...,...
9995,tiefling,63,146,30,13,18,17,13,15,13,2.317460
9996,tiefling,72,230,30,8,14,13,6,14,20,3.194444
9997,tiefling,66,137,30,4,5,16,12,12,15,2.075758
9998,tiefling,68,143,30,12,16,12,15,9,16,2.102941


### Data analysis

In [24]:
# Find the correlation between columns
df.corr()

  df.corr()


Unnamed: 0,height,weight,speed,strength,dexterity,constitution,intelligence,wisdom,charisma,density
height,1.0,0.894832,0.894455,0.179299,0.153711,0.011285,-0.074086,0.007502,0.150191,0.768505
weight,0.894832,1.0,0.671101,0.193236,0.178351,0.063784,-0.104817,-0.013134,0.101761,0.958318
speed,0.894455,0.671101,1.0,0.159327,0.12526,-0.050078,-0.055069,0.02627,0.163431,0.49972
strength,0.179299,0.193236,0.159327,1.0,0.076081,0.00637,-0.020744,-3.1e-05,-0.009164,0.158178
dexterity,0.153711,0.178351,0.12526,0.076081,1.0,-4e-05,-0.021951,-0.012886,0.011723,0.15608
constitution,0.011285,0.063784,-0.050078,0.00637,-4e-05,1.0,-0.017981,0.022677,-0.030495,0.11191
intelligence,-0.074086,-0.104817,-0.055069,-0.020744,-0.021951,-0.017981,1.0,0.019193,0.01086,-0.118094
wisdom,0.007502,-0.013134,0.02627,-3.1e-05,-0.012886,0.022677,0.019193,1.0,0.007217,-0.018865
charisma,0.150191,0.101761,0.163431,-0.009164,0.011723,-0.030495,0.01086,0.007217,1.0,0.073468
density,0.768505,0.958318,0.49972,0.158178,0.15608,0.11191,-0.118094,-0.018865,0.073468,1.0


In [27]:
# Get the count of unique values in a column
df['wisdom'].nunique()

17

In [28]:
# Get the count of each value in a column
df['wisdom'].value_counts()

13    1325
12    1252
14    1227
11    1093
15     985
10     927
16     795
9      646
17     505
8      475
7      276
18     225
6      150
5       69
4       22
19      22
3        6
Name: wisdom, dtype: int64

### Saving data

In [29]:
# Write DataFrame to a CSV file
df.to_csv('new_dnd_stats.csv', index=False)