# adding_features

This Jupyter Notebook looks at adding features to our dataset to expand it.

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

## Regular Season

Since there are 2 main data files for each tournament (Regular Season and Tournament), we will need to work with both accordingly. I will start with the regular season for both tournaments and condense them accordingly.

In [2]:
# Getting the raw data
mens_tournament = pd.read_csv('/Users/jinalshah/Jinal/Projects/march-madness-mania/data/Actual Data /MRegularSeasonDetailedResults.csv')
womens_tournament = pd.read_csv('/Users/jinalshah/Jinal/Projects/march-madness-mania/data/Actual Data /WRegularSeasonDetailedResults.csv')

# Checking to make sure the data loaded correctly
mens_tournament.head(5)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,1104,68,1328,62,N,0,27,58,...,10,16,22,10,22,8,18,9,2,20
1,2003,10,1272,70,1393,63,N,0,26,62,...,24,9,20,20,25,7,12,8,6,16
2,2003,11,1266,73,1437,61,N,0,24,58,...,26,14,23,31,22,9,12,2,5,23
3,2003,11,1296,56,1457,50,N,0,18,38,...,22,8,15,17,20,9,19,4,3,23
4,2003,11,1400,77,1208,71,N,0,30,61,...,16,17,27,21,15,12,10,7,1,14


In [3]:
# Checking to make sure the women's data loaded correctly as well
womens_tournament.head(5)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2010,11,3103,63,3237,49,H,0,23,54,...,13,6,10,11,27,11,23,7,6,19
1,2010,11,3104,73,3399,68,N,0,26,62,...,21,14,27,14,26,7,20,4,2,27
2,2010,11,3110,71,3224,59,A,0,29,62,...,14,19,23,17,23,8,15,6,0,15
3,2010,11,3111,63,3267,58,A,0,27,52,...,26,16,25,22,22,15,11,14,5,14
4,2010,11,3119,74,3447,70,H,1,30,74,...,17,11,21,21,32,12,14,4,2,14


In [4]:
# For each game, there are several statistics to add. Let me add those

# Adding Field Goal % of each team in the game
mens_tournament['WFieldGoal'] = mens_tournament['WFGM'] / mens_tournament['WFGA'] * 100
mens_tournament['LFieldGoal'] = mens_tournament['LFGM'] / mens_tournament['LFGA'] * 100
womens_tournament['WFieldGoal'] = womens_tournament['WFGM'] / womens_tournament['WFGA'] * 100
womens_tournament['LFieldGoal'] = womens_tournament['LFGM'] / womens_tournament['LFGA'] * 100

# Adding 3 PT Field Goal %
mens_tournament['WFieldGoal3'] = mens_tournament['WFGM3'] / mens_tournament['WFGA3'] * 100
mens_tournament['LFieldGoal3'] = mens_tournament['LFGM3'] / mens_tournament['LFGA3'] * 100
womens_tournament['WFieldGoal3'] = womens_tournament['WFGM3'] / womens_tournament['WFGA3'] * 100
womens_tournament['LFieldGoal3'] = womens_tournament['LFGM3'] / womens_tournament['LFGA3'] * 100

# Adding Free Throw Percentage
mens_tournament['WFreeThrow'] = mens_tournament['WFTM'] / mens_tournament['WFTA'] * 100
mens_tournament['LFreeThrow'] = mens_tournament['LFTM'] / mens_tournament['LFTA'] * 100
womens_tournament['WFreeThrow'] = womens_tournament['WFTM'] / womens_tournament['WFTA'] * 100
womens_tournament['LFreeThrow'] = womens_tournament['LFTM'] / womens_tournament['LFTA'] * 100

# Making sure these stats were added
mens_tournament.head(5)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LTO,LStl,LBlk,LPF,WFieldGoal,LFieldGoal,WFieldGoal3,LFieldGoal3,WFreeThrow,LFreeThrow
0,2003,10,1104,68,1328,62,N,0,27,58,...,18,9,2,20,46.551724,41.509434,21.428571,20.0,61.111111,72.727273
1,2003,10,1272,70,1393,63,N,0,26,62,...,12,8,6,16,41.935484,35.820896,40.0,25.0,52.631579,45.0
2,2003,11,1266,73,1437,61,N,0,24,58,...,12,2,5,23,41.37931,30.136986,44.444444,11.538462,58.62069,60.869565
3,2003,11,1296,56,1457,50,N,0,18,38,...,19,4,3,23,47.368421,36.734694,33.333333,27.272727,54.83871,53.333333
4,2003,11,1400,77,1208,71,N,0,30,61,...,10,7,1,14,49.180328,38.709677,42.857143,37.5,84.615385,62.962963


In [5]:
# Adding more statistics

# Adding Assist to Turnover Ratio
mens_tournament['WAssistToTurnoverRatio'] = mens_tournament['WAst'] / mens_tournament['WTO']
mens_tournament['LAssistToTurnoverRatio'] = mens_tournament['LAst'] / mens_tournament['LTO']
womens_tournament['WAssistToTurnoverRatio'] = womens_tournament['WAst'] / womens_tournament['WTO']
womens_tournament['LAssistToTurnoverRatio'] = womens_tournament['LAst'] / womens_tournament['LTO']

# Adding total rebounds
mens_tournament['WTotalRebounds'] = mens_tournament['WOR'] + mens_tournament['WDR']
mens_tournament['LTotalRebounds'] = mens_tournament['LOR'] + mens_tournament['LDR']
womens_tournament['WTotalRebounds'] = womens_tournament['WOR'] + womens_tournament['WDR']
womens_tournament['LTotalRebounds'] = womens_tournament['LOR'] + womens_tournament['LDR']

# Adding Offense Rebound %
mens_tournament['WORPercent'] = mens_tournament['WOR'] / mens_tournament['WTotalRebounds']
mens_tournament['LORPercent'] = mens_tournament['LOR'] / mens_tournament['LTotalRebounds']
womens_tournament['WORPercent'] = womens_tournament['WOR'] / womens_tournament['WTotalRebounds']
womens_tournament['LORPercent'] = womens_tournament['LOR'] / womens_tournament['LTotalRebounds']

# Adding Defensive Rebound %
mens_tournament['WDRPercent'] = mens_tournament['WDR'] / mens_tournament['WTotalRebounds']
mens_tournament['LDRPercent'] = mens_tournament['LDR'] / mens_tournament['LTotalRebounds']
womens_tournament['WDRPercent'] = womens_tournament['WDR'] / womens_tournament['WTotalRebounds']
womens_tournament['LDRPercent'] = womens_tournament['LDR'] / womens_tournament['LTotalRebounds']

# Making sure these stats were calculated correctly
mens_tournament.head(5)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,WFreeThrow,LFreeThrow,WAssistToTurnoverRatio,LAssistToTurnoverRatio,WTotalRebounds,LTotalRebounds,WORPercent,LORPercent,WDRPercent,LDRPercent
0,2003,10,1104,68,1328,62,N,0,27,58,...,61.111111,72.727273,0.565217,0.444444,38,32,0.368421,0.3125,0.631579,0.6875
1,2003,10,1272,70,1393,63,N,0,26,62,...,52.631579,45.0,1.230769,0.583333,43,45,0.348837,0.444444,0.651163,0.555556
2,2003,11,1266,73,1437,61,N,0,24,58,...,58.62069,60.869565,1.5,0.75,43,53,0.395349,0.584906,0.604651,0.415094
3,2003,11,1296,56,1457,50,N,0,18,38,...,54.83871,53.333333,0.916667,0.473684,25,37,0.24,0.459459,0.76,0.540541
4,2003,11,1400,77,1208,71,N,0,30,61,...,84.615385,62.962963,0.857143,1.2,39,36,0.435897,0.583333,0.564103,0.416667


In [6]:
# Adding more stats

# Adding Opposing Field Goal %
mens_tournament['WOpposingFG'] = mens_tournament['LFieldGoal']
mens_tournament['LOpposingFG'] = mens_tournament['WFieldGoal']
womens_tournament['WOpposingFG'] = womens_tournament['LFieldGoal']
womens_tournament['LOpposingFG'] = womens_tournament['WFieldGoal']

# Adding Opposing 3pt Field Goal %
mens_tournament['WOpposingFG3'] = mens_tournament['LFieldGoal3']
mens_tournament['LOpposingFG3'] = mens_tournament['WFieldGoal3']
womens_tournament['WOpposingFG3'] = womens_tournament['LFieldGoal3']
womens_tournament['LOpposingFG3'] = womens_tournament['WFieldGoal3']

# Adding a Defensive Rating to each team
# I made this statistic up and am going to use it to experiment
# Statistic is based on some research I did for a defensive statistic & combined 
mens_tournament['WDefensiveRating'] = abs((mens_tournament['WStl'] * mens_tournament['WBlk'] * mens_tournament['WDR']) - mens_tournament['WOpposingFG'] - mens_tournament['LFTA'])
mens_tournament['LDefensiveRating'] = abs((mens_tournament['LStl'] * mens_tournament['LBlk'] * mens_tournament['LDR']) - mens_tournament['LOpposingFG'] - mens_tournament['WFTA'])
womens_tournament['WDefensiveRating'] = abs((womens_tournament['WStl'] * womens_tournament['WBlk'] * womens_tournament['WDR']) - womens_tournament['WOpposingFG'] - womens_tournament['LFTA'])
womens_tournament['LDefensiveRating'] = abs((womens_tournament['LStl'] * womens_tournament['LBlk'] * womens_tournament['LDR']) - womens_tournament['LOpposingFG'] - womens_tournament['WFTA'])

# Adding an Offensive Rating to each team
# Again, another statistic I made up to use for experiment
mens_tournament['WOffensiveRating'] = abs((mens_tournament['WScore'] * mens_tournament['WFieldGoal'] * mens_tournament['WFieldGoal3']) + mens_tournament['WAst'] + mens_tournament['WOR'] + mens_tournament['WFTA'] + mens_tournament['WFreeThrow'] - 2.5 * mens_tournament['WTO'])
mens_tournament['LOffensiveRating'] = abs((mens_tournament['LScore'] * mens_tournament['LFieldGoal'] * mens_tournament['LFieldGoal3']) + mens_tournament['LAst'] + mens_tournament['LOR'] + mens_tournament['LFTA'] + mens_tournament['LFreeThrow'] - 2.5 * mens_tournament['LTO'])
womens_tournament['WOffensiveRating'] = abs((womens_tournament['WScore'] * womens_tournament['WFieldGoal'] * womens_tournament['WFieldGoal3']) + womens_tournament['WAst'] + womens_tournament['WOR'] + womens_tournament['WFTA'] + womens_tournament['WFreeThrow'] - 2.5 * womens_tournament['WTO'])
womens_tournament['LOffensiveRating'] = abs((womens_tournament['LScore'] * womens_tournament['LFieldGoal'] * womens_tournament['LFieldGoal3']) + womens_tournament['LAst'] + womens_tournament['LOR'] + womens_tournament['LFTA'] + womens_tournament['LFreeThrow'] - 2.5 * womens_tournament['LTO'])

# Seeing that the stats were added
mens_tournament.head(5)

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,WDRPercent,LDRPercent,WOpposingFG,LOpposingFG,WOpposingFG3,LOpposingFG3,WDefensiveRating,LDefensiveRating,WOffensiveRating,LOffensiveRating
0,2003,10,1104,68,1328,62,N,0,27,58,...,0.631579,0.6875,41.509434,46.551724,20.0,21.428571,104.490566,331.448276,67881.123426,51539.425386
1,2003,10,1272,70,1393,63,N,0,26,62,...,0.651163,0.555556,35.820896,41.935484,25.0,40.0,392.179104,1139.064516,117489.486418,56479.910448
2,2003,11,1266,73,1437,61,N,0,24,58,...,0.604651,0.415094,30.136986,41.37931,11.538462,44.444444,206.863014,149.62069,134347.494253,21305.671462
3,2003,11,1296,56,1457,50,N,0,18,38,...,0.76,0.540541,36.734694,47.368421,27.272727,33.333333,480.265306,161.631579,88493.891341,50139.597712
4,2003,11,1400,77,1208,71,N,0,30,61,...,0.564103,0.416667,38.709677,49.180328,37.5,42.857143,286.290323,42.819672,162386.697352,103162.479092


In [7]:
# Saving these files so that we can pull them later on
mens_tournament.to_csv('/Users/jinalshah/Jinal/Projects/march-madness-mania/data/data-preprocessed/men_reg_szn.csv')
womens_tournament.to_csv('/Users/jinalshah/Jinal/Projects/march-madness-mania/data/data-preprocessed/womens_reg_szn.csv')