# Data Mining Final Project - NBA Game Winning Forecasting
## Feature Extraction

In [1]:
import numpy as np
import pandas as pd
import time

# @param dfFile: pandas.DataFrame ('nba_preprocessed.csv')
# @param dateStart, dateEnd: str in the format of 'YYYY-MM-DD'
# @param attriToDrop: list[str]
# @return X, Y: pandas.DataFrame
# featureExtraction() outputs X, Y for model training.
# Game date can be assigned
# Attribute to be dropped can be assigned
def featureExtraction(dfFile, dateStart='1000-01-01', dateEnd='2999-12-31', attriToDrop=None):
    df = pd.read_csv(dfFile)
    
    # Date selection
    df = df.loc[(df.Date_A >= dateStart) & (df.Date_A <= dateEnd), :].reset_index(drop=True)
    
    # Get label Y
    Y = df[['W/L_A']]
    Y = Y.rename(columns={'W/L_A': 'Label'})
    
    # Get attributes X
    attriToDrop = [x + '_A' for x in attriToDrop] + [x + '_B' for x in attriToDrop] if attriToDrop else []
    colToDrop = ['Team_A', 'Date_A', 'W/L_A', 'Score_A', 'Opponent_A', 'Team_B', 'Date_B', 'W/L_B', 'Home/Away_B', 'Score_B', 'Opponent_B']
    colToDrop += attriToDrop if attriToDrop else []
    X = df.drop(columns = colToDrop)
    
    return X, Y

## Feature Extraction

In [2]:
dfFile = 'nba_preprocessed.csv'
dateStart = '2017-08-01'
dateEnd = '2018-05-01'
# X, Y = featureExtraction(dfFile, dateStart, dateEnd)
X, Y = featureExtraction(dfFile, attriToDrop=['PTS'])
# X, Y = featureExtraction(dfFile)

In [3]:
Y

Unnamed: 0,Label
0,1
1,1
2,1
3,0
4,1
5,0
6,0
7,0
8,1
9,1


In [4]:
X

Unnamed: 0,Home/Away_A,FG%_A,FGM_A,FGA_A,3P%_A,3PM_A,3PA_A,FT%_A,FTM_A,FTA_A,...,FTM_B,FTA_B,REB_B,OREB_B,DREB_B,AST_B,STL_B,BLK_B,TOV_B,PF_B
0,1,0.453,43,95,0.250,1,4,0.707,29,41,...,23,32,47,16,31,24,8,8,20,34
1,1,0.460,40,87,0.667,2,3,0.867,39,45,...,34,46,47,23,24,21,9,7,20,32
2,0,0.489,45,92,0.000,0,1,0.808,21,26,...,21,28,51,13,38,23,3,3,21,25
3,0,0.456,41,90,0.500,2,4,0.778,21,27,...,28,35,43,14,29,26,10,10,25,27
4,0,0.479,35,73,0.333,1,3,0.839,26,31,...,6,8,35,11,24,31,10,4,14,27
5,1,0.420,37,88,0.200,1,5,0.778,28,36,...,27,32,44,15,29,23,8,5,8,29
6,0,0.482,40,83,0.333,2,6,0.778,21,27,...,23,27,46,22,24,32,12,3,11,24
7,1,0.494,42,85,0.250,2,8,0.840,42,50,...,44,54,38,10,28,19,9,6,14,39
8,1,0.539,48,89,1.000,2,2,0.920,23,25,...,26,35,38,12,26,26,5,6,10,22
9,1,0.667,60,90,0.500,1,2,0.636,14,22,...,17,21,33,14,19,14,12,5,13,19
