In [19]:
# Importing libraries for data manipulation
import sqlite3
import pandas as pd

# Importing libraries for data visualization
import matplotlib.pyplot as plt
import numpy as np

# Importing libraries for data preprocessing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Importing libraries for classification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import classification_report

# Importing libraries for classification models
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV

from py_helpers.pipeline_helper import *
from py_helpers.db_helper import *

In [20]:
seed = 42
target = 'playoff'
data = './db/WNBA_filtered.db'
tables = ['Awards_Players', 'Coaches', 'Players', 'Players_Teams', 'Teams', 'Teams_Year']

In [21]:
awards_players_df = retrieve_data(data, tables[0])  
coaches_df = retrieve_data(data, tables[1])  
players_df = retrieve_data(data, tables[2])  
players_teams_df = retrieve_data(data, tables[3])  
teams_df = retrieve_data(data, tables[4])  
teams_year_df = retrieve_data(data, tables[5])

In [22]:
# Join the DataFrames
df = teams_df.merge(players_teams_df, on=['year', 'tmID'], how='left') \
                   .merge(players_df, left_on='playerID', right_on='bioID', how='left') \
                   .merge(coaches_df, on=['year', 'tmID'], how='left') \
                   .merge(awards_players_df, on=['playerID', 'year'], how='left') \
                   .merge(teams_year_df, on=['year', 'tmID'], how='left')
df.columns

Index(['year', 'tmID', 'rank', 'playoff', 'o_fgm', 'o_fga', 'o_ftm', 'o_fta',
       'o_3pm', 'o_3pa', 'o_oreb', 'o_dreb', 'o_reb', 'o_asts', 'o_pf',
       'o_stl', 'o_to', 'o_blk', 'o_pts', 'd_fgm', 'd_fga', 'd_ftm', 'd_fta',
       'd_3pm', 'd_3pa', 'd_oreb', 'd_dreb', 'd_reb', 'd_asts', 'd_pf',
       'd_stl', 'd_to', 'd_blk', 'd_pts', 'homeW', 'homeL', 'awayW', 'awayL',
       'last_year_rank', 'playerID', 'stint', 'GP', 'GS', 'minutes', 'points',
       'oRebounds', 'dRebounds', 'rebounds', 'assists', 'steals', 'blocks',
       'turnovers', 'PF', 'fgRatio', 'ftRatio', 'threeRatio', 'dq', 'bioID',
       'height', 'weight', 'birthYear', 'pos_C', 'pos_C_F', 'pos_F', 'pos_F_C',
       'pos_F_G', 'pos_G', 'pos_G_F', 'coachID', 'won', 'lost', 'c_stint',
       'All_Star_Game_Most_Valuable_Player', 'Defensive_Player_of_the_Year',
       'Kim_Perrot_Sportsmanship_Award', 'Most_Improved_Player',
       'Most_Valuable_Player', 'Rookie_of_the_Year', 'Sixth_Woman_of_the_Year',
       'WNBA_

In [23]:
# Data from the eleveth year
coaches_csv = '../season_11/coaches.csv'
players_teams_csv = '../season_11/players_teams.csv'
teams_csv = '../season_11/teams.csv'

df_coaches = pd.read_csv(coaches_csv)


df_players_teams = pd.read_csv(players_teams_csv)
df_teams = pd.read_csv(teams_csv)

KeyError: ''

In [None]:
df_11 = pd.merge(df_coaches, df_players_teams, on=['year', 'tmID'], how='inner')
df_11 = pd.merge(df_11, df_teams, on=['year', 'tmID'], how='inner')
df_11.columns

Index(['coachID', 'year', 'tmID', 'lgID_x', 'stint_x', 'playerID', 'stint_y',
       'lgID_y', 'lgID', 'franchID', 'confID', 'name', 'arena'],
      dtype='object')

In [None]:
df_11 = pd.merge(df_coaches, df_players_teams, on=['year', 'tmID'], how='inner')
df_11 = pd.merge(df_11, df_teams, on=['year', 'tmID'], how='inner')
df_11.shape

In [None]:
team_names = df['tmID'].unique().tolist()
new_teams = df_11['tmID'].unique().tolist()
team_names = list(set(team_names + new_teams))
team_names.sort()
team_names

['ATL',
 'CHA',
 'CHI',
 'CLE',
 'CON',
 'DET',
 'HOU',
 'IND',
 'LAS',
 'MIA',
 'MIN',
 'NYL',
 'ORL',
 'PHO',
 'POR',
 'SAC',
 'SAS',
 'SEA',
 'TUL',
 'UTA',
 'WAS']

In [None]:
len(team_names)
encoder = LabelEncoder()
team_names = encoder.fit_transform(team_names)
print(team_names)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]


In [None]:
df.columns

Index(['year', 'tmID', 'rank', 'playoff', 'o_fgm', 'o_fga', 'o_ftm', 'o_fta',
       'o_3pm', 'o_3pa', 'o_oreb', 'o_dreb', 'o_reb', 'o_asts', 'o_pf',
       'o_stl', 'o_to', 'o_blk', 'o_pts', 'd_fgm', 'd_fga', 'd_ftm', 'd_fta',
       'd_3pm', 'd_3pa', 'd_oreb', 'd_dreb', 'd_reb', 'd_asts', 'd_pf',
       'd_stl', 'd_to', 'd_blk', 'd_pts', 'homeW', 'homeL', 'awayW', 'awayL',
       'last_year_rank', 'playerID', 'stint_x', 'GP', 'GS', 'minutes',
       'points', 'oRebounds', 'dRebounds', 'rebounds', 'assists', 'steals',
       'blocks', 'turnovers', 'PF', 'fgRatio', 'ftRatio', 'threeRatio', 'dq',
       'bioID', 'height', 'weight', 'birthYear', 'pos_C', 'pos_C_F', 'pos_F',
       'pos_F_C', 'pos_F_G', 'pos_G', 'pos_G_F', 'coachID', 'stint_y', 'won',
       'lost', 'All_Star_Game_Most_Valuable_Player',
       'Defensive_Player_of_the_Year', 'Kim_Perrot_Sportsmanship_Award',
       'Most_Improved_Player', 'Most_Valuable_Player', 'Rookie_of_the_Year',
       'Sixth_Woman_of_the_Year', 'WNB

In [None]:
df_11.columns

Index(['coachID', 'year', 'tmID', 'lgID_x', 'stint_x', 'playerID', 'stint_y',
       'lgID_y', 'lgID', 'franchID', 'confID', 'name', 'arena'],
      dtype='object')