### NBA SEASON TEAM FEED

In [73]:
import pandas as pd
import numpy as np
from statistics import mean
from statistics import stdev
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import f_regression
import warnings

warnings.filterwarnings('ignore')
pd.options.display.max_rows = None
pd.options.display.max_columns = None

### DATA PREPROCESSING

In [74]:
sc_data = pd.read_excel("05-16-2021-nba-season-team-feed.xlsx")
column_names = ['BIGDATABALL DATASET', 'GAME-ID', 'DATE', 'TEAM', 'VENUE', '1Q', '2Q',
       '3Q', '4Q', 'OT1', 'OT2', 'OT3', 'OT4', 'OT5', 'FINAL SCORE', 'MIN', 'FG', 'FGA',
       '3P', '3PA', 'FT', 'FTA', 'OR', 'DR', 'TOT', 'AST', 'PF', 'ST', 'TO',
       'TO TO', 'BL', 'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF',
       'TEAM REST DAYS', 'SHOOTING FORWARD','POWER FORWARD',  'CENTER', 'SHOOTING GUARD','POINT GUARD',
       'MAIN REF', 'CREW', 'OPENING ODDS',
       'OPENING SPREAD', 'OPENING TOTAL', 'LINE MOVEMENT #1',
       'LINE MOVEMENT #2', 'LINE MOVEMENT #3', 'CLOSING ODDS',
       'CLOSING SPREAD', 'CLOSING TOTAL', 'MONEYLINE', 'HALFTIME',
       'BOX SCORE URL', 'FULL GAME ODDS URL']
sc_data.columns = column_names

sc_data = sc_data.drop(['MAIN REF', 'CREW'],1)
bd = sc_data.drop(['BIGDATABALL DATASET','GAME-ID','MIN', 'FG', 'FGA','3P', '3PA', 'FT', 'FTA', 'OR', 'DR', 'TOT', 
                   'AST', 'PF', 'ST', 'TO','TO TO', 'BL', 'PTS', 'POSS', 'PACE', 'OEFF', 'DEFF', 'TEAM REST DAYS',
                   'BOX SCORE URL','FULL GAME ODDS URL'],1)

ht_score = []
for i in range(0,len(bd['TEAM']),2):
    ht_score.append(str((int(bd['1Q'][i]) + int(bd['2Q'][i]))) + " " + str((int(bd['1Q'][i+1]) + int(bd['2Q'][i+1]))))
    ht_score.append(str((int(bd['1Q'][i+1]) + int(bd['2Q'][i+1]))) + " " + str((int(bd['1Q'][i]) + int(bd['2Q'][i]))))
bd.insert(13, 'HALF TIME SCORE', ht_score)
final_score = []
for i in range(0,len(bd['TEAM']),1):
    if i %2 ==0:
        final_score.append(str(bd['FINAL SCORE'][i]) + " " + str(bd['FINAL SCORE'][i+1]))
    else:
        final_score.append(str(bd['FINAL SCORE'][i]) + " " + str(bd['FINAL SCORE'][i-1]))
bd.insert(14,'FULL TIME SCORE', final_score)
bd = bd.drop('FINAL SCORE',1)

data = bd.drop(['OT1', 'OT2', 'OT3', 'OT4', 'OT5','OPENING ODDS', 'OPENING SPREAD', 'OPENING TOTAL','LINE MOVEMENT #1',
       'LINE MOVEMENT #2', 'LINE MOVEMENT #3', 'CLOSING ODDS',
       'CLOSING SPREAD', 'CLOSING TOTAL', 'MONEYLINE', 'HALFTIME'],1)

ht_outcome = []
ft_outcome = []

for i in range(0,len(data),1):
    
    if int(data.loc[i]['HALF TIME SCORE'].split()[0]) == int(data.loc[i]['HALF TIME SCORE'].split()[1]):
        ht_outcome.append('T')
        
    elif int(data.loc[i]['HALF TIME SCORE'].split()[0]) > int(data.loc[i]['HALF TIME SCORE'].split()[1]):
        ht_outcome.append('W')
     
    else:
        ht_outcome.append('L')
    
    if int(data.loc[i]['FULL TIME SCORE'].split()[0]) > int(data.loc[i]['FULL TIME SCORE'].split()[1]):
        ft_outcome.append('W')
    else:
        ft_outcome.append('L')
data.insert(5,'HT OUTCOME', ht_outcome)
data.insert(9,'FT OUTCOME', ft_outcome)

lead_change = []

for i in range(0, len(data), 1):
    if data.loc[i]['HT OUTCOME'] == data.loc[i]['FT OUTCOME']:
        lead_change.append('No')
    else:
        lead_change.append('Yes')
data.insert(11,'LEAD CHANGE', lead_change)

ht_ft = []

for i in range(0,len(data),1):
    if data.iloc[i]['HT OUTCOME'] == 'L':
        if data.iloc[i]['FT OUTCOME'] == 'L':
            ht_ft.append('L/L')
        else:
            ht_ft.append('L/W')

    elif data.iloc[i]['HT OUTCOME'] == 'T':
        if data.iloc[i]['FT OUTCOME'] == 'W':
            ht_ft.append('T/W')
        else:
            ht_ft.append('T/L')
    else:
        if data.iloc[i]['FT OUTCOME'] == 'W':
            ht_ft.append('W/W')
        else:
            ht_ft.append('W/L')
data.insert(11,'HT/FT', ht_ft)
data = data.drop(['1Q','2Q','3Q','4Q'],1)

In [75]:
data.head()

Unnamed: 0,DATE,TEAM,VENUE,HT OUTCOME,HALF TIME SCORE,FT OUTCOME,FULL TIME SCORE,HT/FT,LEAD CHANGE,SHOOTING FORWARD,POWER FORWARD,CENTER,SHOOTING GUARD,POINT GUARD
0,12/22/2020,Golden State,R,L,45 63,L,99 125,L/L,No,Andrew Wiggins,Eric Paschall,James Wiseman,Kelly Oubre Jr.,Stephen Curry
1,12/22/2020,Brooklyn,H,W,63 45,W,125 99,W/W,No,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving
2,12/22/2020,LA Clippers,R,W,56 54,W,116 109,W/W,No,Kawhi Leonard,Nicolas Batum,Serge Ibaka,Paul George,Patrick Beverley
3,12/22/2020,LA Lakers,H,L,54 56,L,109 116,L/L,No,LeBron James,Anthony Davis,Marc Gasol,Kentavious Caldwell-Pope,Dennis Schroder
4,12/23/2020,Milwaukee,R,L,59 64,L,121 122,L/L,No,Khris Middleton,Giannis Antetokounmpo,Brook Lopez,Donte DiVincenzo,Jrue Holiday


In [76]:
opp_list = []
for i in range(0,len(data),1):
    if i %2 == 0:
        opp_list.append(data.iloc[i+1]['TEAM'])
    else:
        opp_list.append(data.iloc[i-1]['TEAM'])

In [77]:
data.insert(3, 'OPPONENT', opp_list)

In [78]:
sl = []
for i in range(0, len(data), 1):
    starting_lineup = []
    starting_lineup.append(data.iloc[i]['POINT GUARD'])
    starting_lineup.append(data.iloc[i]['SHOOTING GUARD'])
    starting_lineup.append(data.iloc[i]['SHOOTING FORWARD'])
    starting_lineup.append(data.iloc[i]['POWER FORWARD'])
    starting_lineup.append(data.iloc[i]['CENTER'])
    sl.append(str(starting_lineup))

In [79]:
data.insert(15, 'STARTING LINEUP', sl)

In [80]:
warriors_data = data[data['TEAM'] == 'Golden State']
nets_data = data[data['TEAM'] == 'Brooklyn']
clippers_data = data[data['TEAM'] == 'LA Clippers']
lakers_data = data[data['TEAM'] == 'LA Lakers']
bucks_data = data[data['TEAM'] == 'Milwaukee']
celtics_data = data[data['TEAM'] == 'Boston']
mavs_data = data[data['TEAM'] == 'Dallas']
suns_data = data[data['TEAM'] == 'Phoenix']
hornets_data = data[data['TEAM'] == 'Charlotte']
cavs_data = data[data['TEAM'] == 'Cleveland']
knicks_data = data[data['TEAM'] == 'New York']
pacers_data = data[data['TEAM'] == 'Indiana']
heat_data = data[data['TEAM'] == 'Miami']
magic_data = data[data['TEAM'] == 'Orlando']
wizards_data = data[data['TEAM'] == 'Washington']
phil_data = data[data['TEAM'] == 'Philadelphia']
pelicans_data = data[data['TEAM'] == 'New Orleans']
raptors_data = data[data['TEAM'] == 'Toronto']
hawks_data = data[data['TEAM'] == 'Atlanta']
bulls_data = data[data['TEAM'] == 'Chicago']
spurs_data = data[data['TEAM'] == 'San Antonio']
grizzlies_data = data[data['TEAM'] == 'Memphis']
pistons_data = data[data['TEAM'] == 'Detroit']
timberwolves_data = data[data['TEAM'] == 'Minnesota']
kings_data = data[data['TEAM'] == 'Sacramento']
nuggets_data = data[data['TEAM'] == 'Denver']
jazz_data = data[data['TEAM'] == 'Utah']
tblazers_data = data[data['TEAM'] == 'Portland']
okc_data = data[data['TEAM'] == 'Oklahoma City']
rockets_data = data[data['TEAM'] == 'Houston']

teams_data_list = [warriors_data, nets_data, clippers_data, lakers_data, bucks_data, rockets_data, okc_data, tblazers_data, jazz_data
                  , nuggets_data, kings_data, timberwolves_data, pistons_data, grizzlies_data, spurs_data, bulls_data, hawks_data, 
                  raptors_data, pelicans_data, phil_data, wizards_data, magic_data, heat_data, suns_data, celtics_data, mavs_data,
                   hornets_data, cavs_data, knicks_data, pacers_data]

In [81]:
nets_data.head()

Unnamed: 0,DATE,TEAM,VENUE,OPPONENT,HT OUTCOME,HALF TIME SCORE,FT OUTCOME,FULL TIME SCORE,HT/FT,LEAD CHANGE,SHOOTING FORWARD,POWER FORWARD,CENTER,SHOOTING GUARD,POINT GUARD,STARTING LINEUP
1,12/22/2020,Brooklyn,H,Golden State,W,63 45,W,125 99,W/W,No,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
32,12/25/2020,Brooklyn,R,Boston,L,51 54,W,123 95,L/W,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
60,12/27/2020,Brooklyn,R,Charlotte,W,50 48,L,104 106,W/L,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
81,12/28/2020,Brooklyn,H,Memphis,W,55 54,L,111 116,W/L,Yes,Timothe Luwawu-Cabarrot,Taurean Prince,DeAndre Jordan,Joe Harris,Caris LeVert,"['Caris LeVert', 'Joe Harris', 'Timothe Luwawu..."
111,12/30/2020,Brooklyn,H,Atlanta,L,67 68,W,145 141,L/W,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Timothe Luwawu-Cabarrot,Kyrie Irving,"['Kyrie Irving', 'Timothe Luwawu-Cabarrot', 'J..."


### FEATURE PROCESSING FOR THE LOGISTIC REGRESSION

### WIN STREAK - WIN PERCENTAGE - WIN MOVING AVERAGES - VENUE

In [82]:
team_name_dataset = dict()
for i in range(0,30,1):
    team_name_dataset[teams_data_list[i].iloc[0]['TEAM']] = teams_data_list[i]

In [83]:
def addWinStreak(team_data):
    
    results = team_data['FT OUTCOME'].map({'W':1, 'L':-1, 'T': 0})
    streak = []
    streak.append('NA')
    streak.append(results.iloc[0])
    l = len(results)
    
    for i in range(1,l-1,1):
        if results.iloc[i] == results.iloc[i-1]:
            streak.append(results.iloc[i] + streak[-1])
        else:
            streak.append(results.iloc[i])
    
    team_data.insert(4, 'STREAK', streak)

for team_data in teams_data_list:
    addWinStreak(team_data)

In [84]:
def venueEncoder(team_data):
    team_data['VENUE'] = team_data['VENUE'].map({'H':1, 'R':0})
    
for team_data in teams_data_list:
    venueEncoder(team_data)

In [85]:
def FTWinPer(team_data):
    win_p = [0,0,0,0,0,0,0]
    for i in range(7,len(team_data),1):
        win_p.append(round(team_data.iloc[0:i]['FT OUTCOME'].value_counts(normalize = True)['W'],3))
    team_data.insert(5,'FT WinPer',win_p)

for team_data in teams_data_list:
    FTWinPer(team_data)

In [86]:
def OPPFTWinPer(team_data):
    win_p = [0,0,0,0,0,0,0]
    for i in range(7,len(team_data),1):
        oppname = team_data.iloc[i]['OPPONENT']
        win_p.append(round(data[data['TEAM'] == oppname].iloc[0:i]['FT OUTCOME'].value_counts(normalize = True)['W'],3))
    team_data.insert(6,'OPP FT WinPer',win_p)

for team_data in teams_data_list:
    OPPFTWinPer(team_data)

In [87]:
def LastNFTWper(team_data):
    N=7
    htwinperav = list(np.zeros(N))
    for i in range(N, len(team_data),1):
        try:
            wp = team_data.iloc[i-N:i]['FT OUTCOME'].value_counts(normalize=True)['W']
            wp = round(wp,3)
        except Exception as e:
            wp = '0'
        htwinperav.append(wp)
    team_data.insert(7, 'L7 FT W %', htwinperav)

In [88]:
def LastMoppFTWper(team_data):
    N=7
    htwinperav = list(np.zeros(N))
    for i in range(N, len(team_data),1):
        oppname = team_data.iloc[i]['OPPONENT']
        try:
            wp = data[data['TEAM'] == oppname].iloc[i-N:i]['FT OUTCOME'].value_counts(normalize=True)['W']
            wp = round(wp,3)
        except Exception as e:
            wp = '0'
        htwinperav.append(wp)
    team_data.insert(8, 'OPP L7 FT W %', htwinperav)

In [89]:
for team_data in teams_data_list:
    LastNFTWper(team_data)
    LastMoppFTWper(team_data)

In [90]:
def LastM_FT_WinP(team_data):
    M=15
    wp = list(np.zeros(M))
    for i in range(M,len(team_data),1):
        try:
            a = team_data.iloc[i-M:i]['FT OUTCOME'].value_counts(normalize = True)['W']
            a = round(a,3)
            wp.append(a)
        except Exception as e:
            wp.append('0')
    team_data.insert(9, 'L15 FT W %', wp)

for team_data in teams_data_list:
    LastM_FT_WinP(team_data)

In [91]:
def Opp_LastM_FT_WinP(team_data):
    N=15
    htwinperav = list(np.zeros(N))
    for i in range(N, len(team_data),1):
        oppname = team_data.iloc[i]['OPPONENT']
        try:
            wp = data[data['TEAM'] == oppname].iloc[i-N:i]['FT OUTCOME'].value_counts(normalize=True)['W']
            wp = round(wp,3)
        except Exception as e:
            wp = '0'
        htwinperav.append(wp)
    team_data.insert(10, 'OPP L15 FT W %', htwinperav)
    
for team_data in teams_data_list:
    Opp_LastM_FT_WinP(team_data)

In [93]:
nets_data.head()

Unnamed: 0,DATE,TEAM,VENUE,OPPONENT,STREAK,FT WinPer,OPP FT WinPer,L7 FT W %,OPP L7 FT W %,L15 FT W %,OPP L15 FT W %,HT OUTCOME,HALF TIME SCORE,FT OUTCOME,FULL TIME SCORE,HT/FT,LEAD CHANGE,SHOOTING FORWARD,POWER FORWARD,CENTER,SHOOTING GUARD,POINT GUARD,STARTING LINEUP
1,12/22/2020,Brooklyn,1,Golden State,,0.0,0.0,0.0,0,0.0,0,W,63 45,W,125 99,W/W,No,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
32,12/25/2020,Brooklyn,0,Boston,1.0,0.0,0.0,0.0,0,0.0,0,L,51 54,W,123 95,L/W,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
60,12/27/2020,Brooklyn,0,Charlotte,2.0,0.0,0.0,0.0,0,0.0,0,W,50 48,L,104 106,W/L,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
81,12/28/2020,Brooklyn,1,Memphis,-1.0,0.0,0.0,0.0,0,0.0,0,W,55 54,L,111 116,W/L,Yes,Timothe Luwawu-Cabarrot,Taurean Prince,DeAndre Jordan,Joe Harris,Caris LeVert,"['Caris LeVert', 'Joe Harris', 'Timothe Luwawu..."
111,12/30/2020,Brooklyn,1,Atlanta,-2.0,0.0,0.0,0.0,0,0.0,0,L,67 68,W,145 141,L/W,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Timothe Luwawu-Cabarrot,Kyrie Irving,"['Kyrie Irving', 'Timothe Luwawu-Cabarrot', 'J..."


### THE IMPACT PLAYER(S) FOR EACH TEAM

In [94]:
encoder = []
for i in range(0, len(nets_data),1):
    if 'James Harden' in nets_data['STARTING LINEUP'].iloc[i] or 'Kevin Durant' in nets_data['STARTING LINEUP'].iloc[i] or 'Kyrie Irving' in nets_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
nets_data.insert(16,'IMPACT PLAYER',encoder)

In [95]:
encoder = []
for i in range(0, len(warriors_data),1):
    if 'Stephen Curry' in warriors_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
warriors_data.insert(16,'IMPACT PLAYER',encoder)

In [96]:
encoder = []
for i in range(0, len(clippers_data),1):
    if 'Kawhi Leonard' in clippers_data['STARTING LINEUP'].iloc[i] and 'Paul George' in clippers_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
clippers_data.insert(16,'IMPACT PLAYER',encoder)

In [97]:
encoder = []
for i in range(0, len(lakers_data),1):
    if 'Lebron James' in lakers_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
lakers_data.insert(16,'IMPACT PLAYER',encoder)

In [98]:
encoder = []
for i in range(0, len(bucks_data),1):
    if 'Giannis Antetokounmpo' in bucks_data['STARTING LINEUP'].iloc[i] or 'Khris Middleton' in bucks_data.iloc[i]['STARTING LINEUP']:
        encoder.append(1)
    else:
        encoder.append(0)
bucks_data.insert(16,'IMPACT PLAYER',encoder)

In [99]:
encoder = []
for i in range(0, len(rockets_data),1):
    if 'John Wall' in rockets_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
rockets_data.insert(16,'IMPACT PLAYER',encoder)

In [100]:
encoder = []
for i in range(0, len(okc_data),1):
    if 'Shai Gilgeous-Alexander' in okc_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
okc_data.insert(16,'IMPACT PLAYER',encoder)

In [101]:
encoder = []
for i in range(0, len(tblazers_data),1):
    if 'Damian Lillard' in tblazers_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
tblazers_data.insert(16,'IMPACT PLAYER',encoder)

In [102]:
encoder = []
for i in range(0, len(jazz_data),1):
    if 'Donovan Mitchell' in jazz_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
jazz_data.insert(16,'IMPACT PLAYER',encoder)

In [103]:
encoder = []
for i in range(0, len(nuggets_data),1):
    if 'Nikola Jokic' in nuggets_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
nuggets_data.insert(16,'IMPACT PLAYER',encoder)

In [104]:
encoder = []
for i in range(0, len(kings_data),1):
    if "De'Aaron Fox" in kings_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
kings_data.insert(16,'IMPACT PLAYER',encoder)

In [105]:
encoder = []
for i in range(0, len(timberwolves_data),1):
    if 'Karl-Anthony Towns' in timberwolves_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
timberwolves_data.insert(16,'IMPACT PLAYER',encoder)

In [106]:
encoder = []
for i in range(0, len(pistons_data),1):
    if 'Jerami Grant' in pistons_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
pistons_data.insert(16,'IMPACT PLAYER',encoder)

In [107]:
encoder = []
for i in range(0, len(grizzlies_data),1):
    if 'Ja Morant' in grizzlies_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
grizzlies_data.insert(16,'IMPACT PLAYER',encoder)

In [108]:
encoder = []
for i in range(0, len(spurs_data),1):
    if 'DeMar DeRozan' in spurs_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
spurs_data.insert(16,'IMPACT PLAYER',encoder)

In [109]:
encoder = []
for i in range(0, len(bulls_data),1):
    if 'Zach LaVine' in bulls_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
bulls_data.insert(16,'IMPACT PLAYER',encoder)

In [110]:
encoder = []
for i in range(0, len(hawks_data),1):
    if 'Trae Young' in hawks_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
hawks_data.insert(16,'IMPACT PLAYER',encoder)

In [111]:
wl = []
for i in range(0,len(raptors_data),1):
    if 'Fred VanVleet' in raptors_data.iloc[i]['STARTING LINEUP']:
        wl.append(raptors_data['FT OUTCOME'].map({'W': 1, 'L':0}).iloc[i])
    else:
        pass
    
print(sum(wl))
print(len(wl))
print(sum(wl)/len(wl))

22
52
0.4230769230769231


In [112]:
encoder = []
for i in range(0, len(raptors_data),1):
    if 'Fred VanVleet' in raptors_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
raptors_data.insert(16,'IMPACT PLAYER',encoder)

In [113]:
pelicans_data[pelicans_data['POWER FORWARD'] == 'Zion Williamson']['FT OUTCOME'].value_counts(normalize=True)

L    0.52459
W    0.47541
Name: FT OUTCOME, dtype: float64

In [114]:
pelicans_data[pelicans_data['SHOOTING FORWARD'] == 'Brandon Ingram']['FT OUTCOME'].value_counts(normalize=True)

L    0.508475
W    0.491525
Name: FT OUTCOME, dtype: float64

In [115]:
encoder = []
for i in range(0, len(pelicans_data),1):
    if 'Brandon Ingram' in pelicans_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
pelicans_data.insert(16,'IMPACT PLAYER',encoder)

In [116]:
encoder = []
for i in range(0, len(phil_data),1):
    if 'Joel Embiid' in phil_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
phil_data.insert(16,'IMPACT PLAYER',encoder)

In [117]:
encoder = []
for i in range(0, len(magic_data),1):
    if 'Nikola Vucevic' in magic_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
magic_data.insert(16,'IMPACT PLAYER',encoder)

In [118]:
encoder = []
for i in range(0, len(heat_data),1):
    if 'Jimmy Butler' in heat_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
heat_data.insert(16,'IMPACT PLAYER',encoder)

In [119]:
encoder = []
for i in range(0, len(suns_data),1):
    if 'Devin Booker' in suns_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
suns_data.insert(16,'IMPACT PLAYER',encoder)

In [120]:
encoder = []
for i in range(0, len(celtics_data),1):
    if 'Jayson Tatum' in celtics_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
celtics_data.insert(16,'IMPACT PLAYER',encoder)

In [121]:
encoder = []
for i in range(0, len(mavs_data),1):
    if 'Luka Doncic' in mavs_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
mavs_data.insert(16,'IMPACT PLAYER',encoder)

In [122]:
encoder = []
for i in range(0, len(hornets_data),1):
    if 'Gordon Hayward' in hornets_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
hornets_data.insert(16,'IMPACT PLAYER',encoder)

In [123]:
encoder = []
for i in range(0, len(cavs_data),1):
    if 'Collin Sexton' in cavs_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
cavs_data.insert(16,'IMPACT PLAYER',encoder)

In [124]:
encoder = []
for i in range(0, len(knicks_data),1):
    if 'Julius Randle' in knicks_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
knicks_data.insert(16,'IMPACT PLAYER',encoder)

In [125]:
encoder = []
for i in range(0, len(wizards_data),1):
    if 'Bradley Beal' in wizards_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
wizards_data.insert(16,'IMPACT PLAYER',encoder)

In [126]:
encoder = []
for i in range(0, len(pacers_data),1):
    if 'Malcolm Brogdon' in pacers_data['STARTING LINEUP'].iloc[i] and 'Caris Levert' in pacers_data['STARTING LINEUP'].iloc[i]:
        encoder.append(1)
    else:
        encoder.append(0)
pacers_data.insert(16,'IMPACT PLAYER',encoder)

### OPPONENT IMPACT PLAYER

In [127]:
def OppImpactPlayer(team_data):
    OppImpPlayerList = []
    for i in range(0,len(team_data),1):
        date = team_data.iloc[i]['DATE']
        opp = team_data.iloc[i]['OPPONENT']
        OppImpPlayerList.append(int(team_name_dataset[opp][team_name_dataset[opp]['DATE'] == date]['IMPACT PLAYER']))
    team_data.insert(17, 'OPP IMPACT PLAYER', OppImpPlayerList)

In [128]:
for team_data in teams_data_list:
    OppImpactPlayer(team_data)

In [129]:
nets_data

Unnamed: 0,DATE,TEAM,VENUE,OPPONENT,STREAK,FT WinPer,OPP FT WinPer,L7 FT W %,OPP L7 FT W %,L15 FT W %,OPP L15 FT W %,HT OUTCOME,HALF TIME SCORE,FT OUTCOME,FULL TIME SCORE,HT/FT,IMPACT PLAYER,OPP IMPACT PLAYER,LEAD CHANGE,SHOOTING FORWARD,POWER FORWARD,CENTER,SHOOTING GUARD,POINT GUARD,STARTING LINEUP
1,12/22/2020,Brooklyn,1,Golden State,,0.0,0.0,0.0,0.0,0.0,0.0,W,63 45,W,125 99,W/W,1,1,No,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
32,12/25/2020,Brooklyn,0,Boston,1.0,0.0,0.0,0.0,0.0,0.0,0.0,L,51 54,W,123 95,L/W,1,1,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
60,12/27/2020,Brooklyn,0,Charlotte,2.0,0.0,0.0,0.0,0.0,0.0,0.0,W,50 48,L,104 106,W/L,1,1,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Spencer Dinwiddie,Kyrie Irving,"['Kyrie Irving', 'Spencer Dinwiddie', 'Joe Har..."
81,12/28/2020,Brooklyn,1,Memphis,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,W,55 54,L,111 116,W/L,0,1,Yes,Timothe Luwawu-Cabarrot,Taurean Prince,DeAndre Jordan,Joe Harris,Caris LeVert,"['Caris LeVert', 'Joe Harris', 'Timothe Luwawu..."
111,12/30/2020,Brooklyn,1,Atlanta,-2.0,0.0,0.0,0.0,0.0,0.0,0.0,L,67 68,W,145 141,L/W,1,1,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Timothe Luwawu-Cabarrot,Kyrie Irving,"['Kyrie Irving', 'Timothe Luwawu-Cabarrot', 'J..."
141,01/01/2021,Brooklyn,1,Atlanta,1.0,0.0,0.0,0.0,0.0,0.0,0.0,L,52 61,L,96 114,L/L,1,1,No,Joe Harris,Kevin Durant,DeAndre Jordan,Timothe Luwawu-Cabarrot,Kyrie Irving,"['Kyrie Irving', 'Timothe Luwawu-Cabarrot', 'J..."
169,01/03/2021,Brooklyn,1,Washington,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,W,71 70,L,122 123,W/L,1,1,Yes,Joe Harris,Kevin Durant,DeAndre Jordan,Timothe Luwawu-Cabarrot,Kyrie Irving,"['Kyrie Irving', 'Timothe Luwawu-Cabarrot', 'J..."
201,01/05/2021,Brooklyn,1,Utah,-2.0,0.429,0.571,0.429,0.571,0.0,0.0,W,63 44,W,130 96,W/W,1,1,No,Taurean Prince,Jeff Green,Jarrett Allen,Bruce Brown,Kyrie Irving,"['Kyrie Irving', 'Bruce Brown', 'Taurean Princ..."
233,01/07/2021,Brooklyn,1,Philadelphia,1.0,0.5,0.875,0.429,0.857,0.0,0.0,W,65 51,W,122 109,W/W,0,1,No,Taurean Prince,Jeff Green,Jarrett Allen,Bruce Brown,Caris LeVert,"['Caris LeVert', 'Bruce Brown', 'Taurean Princ..."
252,01/08/2021,Brooklyn,0,Memphis,2.0,0.556,0.333,0.429,0.429,0.0,0.0,L,47 62,L,110 115,L/L,0,0,No,Taurean Prince,Jeff Green,Jarrett Allen,Bruce Brown,Caris LeVert,"['Caris LeVert', 'Bruce Brown', 'Taurean Princ..."


### FEATURE SELECTION

In [130]:
output = nets_data.columns[13]
var1 = nets_data.columns[2] 
var2 = list(nets_data.columns[4:11])
var3 = list(nets_data.columns[16:18])
features = var2 + var3
features.append(var1)
features

['STREAK',
 'FT WinPer',
 'OPP FT WinPer',
 'L7 FT W %',
 'OPP L7 FT W %',
 'L15 FT W %',
 'OPP L15 FT W %',
 'IMPACT PLAYER',
 'OPP IMPACT PLAYER',
 'VENUE']

In [131]:
def LogReg(team_data):
    
    N = 7
    x_train = team_data.iloc[10:-N][features].values
    x_test = team_data.iloc[-N:][features].values
    y_train = team_data.iloc[10:-N][output].map({'W':1, 'L':-1, 'T':-1}).values
    y_test = team_data.iloc[-N:][output].map({'W':1, 'L':-1, 'T':-1}).values
    
    
    reg = LogisticRegression()
    reg.fit(x_train, y_train)
    y_pred = reg.predict(x_test)

    
    p = f_regression(x_train, y_train)[1]
    pvalues.append(team_data.iloc[0]['TEAM'])
    pvalues.append(str(p.round(3)))

In [132]:
pvalues = []
for team_data in teams_data_list:
    LogReg(team_data)

### P VALUE TABLE

In [133]:
df = np.array(pvalues).reshape(30,2)
pv_dict = dict()
for i in range(0,30,1):
    pv = df[i][1][1:-1].split()
    pv_dict[df[i][0]] = pv
pv_df = pd.DataFrame(data = pv_dict, index=[features])
pv_df.reset_index(inplace=True)
pv_df = pv_df.rename(columns = {'level_0':'Variables'})
pv_df.set_index('Variables', inplace=True)

In [134]:
pv_df = pv_df.replace('nan', 0.3) # Since some of the impact players never missed a game, the p-value for those teams ended up being nan, so I decided to use 0.3 which is close to the mean p-value for impact player variable 

In [137]:
print(pv_df)

                  Golden State Brooklyn LA Clippers LA Lakers Milwaukee  \
Variables                                                                 
STREAK                   0.173    0.505       0.896     0.811     0.462   
FT WinPer                0.015    0.438       0.188     0.261     0.042   
OPP FT WinPer            0.057    0.306        0.01     0.018     0.306   
L7 FT W %                0.254    0.399       0.768     0.788     0.552   
OPP L7 FT W %            0.073    0.382       0.006     0.001      0.29   
L15 FT W %               0.885    0.628       0.113     0.775     0.485   
OPP L15 FT W %           0.046    0.479       0.005     0.005     0.839   
IMPACT PLAYER            0.025    0.136       0.221       0.3     0.058   
OPP IMPACT PLAYER        0.313    0.235       0.136     0.349     0.791   
VENUE                    0.083    0.078       0.157     0.885     0.803   

                  Houston Oklahoma City Portland   Utah Denver Sacramento  \
Variables             

In [138]:
significant_var = dict()
sig_level = 0.05
for team_data in teams_data_list:
    var = []
    tn = team_data.iloc[0]['TEAM']
    for i in range(0,8,1):
        pv = pv_df[tn].values[i]
        if float(pv) <= sig_level:
            var.append(pv_df.index[i])
        else:
            pass
    significant_var[tn] = var

In [139]:
features = ['FT WinPer', 'OPP FT WinPer','OPP L7 FT W %', 'IMPACT PLAYER', 'OPP IMPACT PLAYER']

In [140]:
def Model(team_data):
    
    num = len(features)
    
    x = team_data.iloc[15:-2][features].values
    y = team_data.iloc[15:-2][output].map({'W':1, 'L':-1, 'T':-1}).values
    
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 5)
    
    x_train = np.array(x_train).reshape(len(x_train), num)
    y_train = np.array(y_train).reshape(len(y_train), 1)
    x_test = np.array(x_test).reshape(len(x_test),num)
    y_test = np.array(y_test).reshape(len(y_test),1)
    
    reg = LogisticRegression()
    reg.fit(x_train, y_train)
    y_pred = reg.predict(x_test)
    y_pred = np.array(y_pred).reshape(len(y_test),1)
    
    a = accuracy_score(y_pred, y_test)
    model_sum[team_data.iloc[0]['TEAM']] = round(a,3)
    model_accuracy.append(a)

In [141]:
model_sum = dict()
model_accuracy = []
for team_data in teams_data_list:
    Model(team_data)

model_sum['Overall Accuracy'] = mean(model_accuracy)
print(model_sum)

{'Golden State': 0.833, 'Brooklyn': 0.5, 'LA Clippers': 0.667, 'LA Lakers': 0.667, 'Milwaukee': 0.833, 'Houston': 0.667, 'Oklahoma City': 0.333, 'Portland': 0.833, 'Utah': 0.667, 'Denver': 0.833, 'Sacramento': 0.333, 'Minnesota': 0.5, 'Detroit': 0.833, 'Memphis': 0.667, 'San Antonio': 0.667, 'Chicago': 0.833, 'Atlanta': 0.833, 'Toronto': 0.833, 'New Orleans': 0.333, 'Philadelphia': 0.667, 'Washington': 0.5, 'Orlando': 0.667, 'Miami': 0.5, 'Phoenix': 0.833, 'Boston': 0.5, 'Dallas': 0.833, 'Charlotte': 0.833, 'Cleveland': 0.667, 'New York': 0.833, 'Indiana': 0.833, 'Overall Accuracy': 0.6777777777777778}


In [142]:
modelLast7_sum = dict()
modelLast7_accuracy = []
def ModelLast7Games(team_data):
    
    N = 7
    num = len(features)
    
    x_train = team_data.iloc[10:-N][features].values
    y_train = team_data.iloc[10:-N][output]#.map({'W':1, 'L':0}).values
    
    x_test = team_data.iloc[-N:][features].values
    y_test = team_data.iloc[-N:][output]#.map({'W':1, 'L':0}).values
    
    x_train = np.array(x_train).reshape(len(x_train), num)
    y_train = np.array(y_train).reshape(len(y_train), 1)
    x_test = np.array(x_test).reshape(N,num)
    y_test = np.array(y_test).reshape(N,1)
    
    reg = LogisticRegression()
    reg.fit(x_train, y_train)
    y_pred = reg.predict(x_test)
    y_pred = np.array(y_pred).reshape(N,1)
    
    a = accuracy_score(y_pred, y_test)
    modelLast7_sum[team_data.iloc[0]['TEAM']] = round(a,3)
    modelLast7_accuracy.append(a)

In [143]:
for team_data in teams_data_list:
    ModelLast7Games(team_data)

In [144]:
modelLast7_sum['Overall Accuracy'] = mean(modelLast7_accuracy)
print(modelLast7_sum)

{'Golden State': 0.429, 'Brooklyn': 0.714, 'LA Clippers': 0.571, 'LA Lakers': 0.714, 'Milwaukee': 0.857, 'Houston': 0.857, 'Oklahoma City': 0.857, 'Portland': 0.714, 'Utah': 0.714, 'Denver': 0.571, 'Sacramento': 0.571, 'Minnesota': 0.571, 'Detroit': 0.857, 'Memphis': 0.571, 'San Antonio': 0.571, 'Chicago': 0.571, 'Atlanta': 0.571, 'Toronto': 1.0, 'New Orleans': 0.714, 'Philadelphia': 0.714, 'Washington': 0.857, 'Orlando': 1.0, 'Miami': 0.857, 'Phoenix': 0.571, 'Boston': 0.857, 'Dallas': 0.714, 'Charlotte': 1.0, 'Cleveland': 0.857, 'New York': 0.714, 'Indiana': 0.714, 'Overall Accuracy': 0.7285714285714285}
