# Phase 1: Importing Libraries and Dataset

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import pandas as pd

# Importing dataset
df = pd.read_csv('nbaGames.csv')
# Displaying all columns
pd.set_option('display.max_columns', None)

In [2]:
# Viewing dataset
df.head()

Unnamed: 0.1,Unnamed: 0,Team,Game,Year,Home,Opponent,WINorLOSS,TeamPoints,OpponentPoints,FieldGoals,FieldGoalsAttempted,FieldGoals.,X3PointShots,X3PointShotsAttempted,X3PointShots.,FreeThrows,FreeThrowsAttempted,FreeThrows.,OffRebounds,TotalRebounds,Assists,Steals,Blocks,Turnovers,TotalFouls,Opp.FieldGoals,Opp.FieldGoalsAttempted,Opp.FieldGoals.,Opp.3PointShots,Opp.3PointShotsAttempted,Opp.3PointShots.,Opp.FreeThrows,Opp.FreeThrowsAttempted,Opp.FreeThrows.,Opp.OffRebounds,Opp.TotalRebounds,Opp.Assists,Opp.Steals,Opp.Blocks,Opp.Turnovers,Opp.TotalFouls
0,1,ATL,1,2015,Away,TOR,L,102,109,40,80,0.5,13,22,0.591,9,17,0.529,10,42,26,6,8,17,24,37,90,0.411,8,26,0.308,27,33,0.818,16,48,26,13,9,9,22
1,2,ATL,2,2015,Home,IND,W,102,92,35,69,0.507,7,20,0.35,25,33,0.758,3,37,26,10,6,12,20,31,81,0.383,12,32,0.375,18,21,0.857,11,44,25,5,5,18,26
2,3,ATL,3,2015,Away,SAS,L,92,94,38,92,0.413,8,25,0.32,8,11,0.727,10,37,26,14,5,13,25,31,69,0.449,5,17,0.294,27,38,0.711,11,50,25,7,9,19,15
3,4,ATL,4,2015,Away,CHO,L,119,122,43,93,0.462,13,33,0.394,20,26,0.769,7,38,28,8,3,19,33,48,97,0.495,6,21,0.286,20,27,0.741,11,51,31,6,7,19,30
4,5,ATL,5,2015,Home,NYK,W,103,96,33,81,0.407,9,22,0.409,28,36,0.778,12,41,18,10,5,8,17,40,84,0.476,8,21,0.381,8,11,0.727,13,44,26,2,6,15,29


# Phase 1: Cleaning Data

In [3]:
# Checking for any missing values

df.isnull().sum()
#df = df.fillna(0) -> fill missing values with 0

Unnamed: 0                  0
Team                        0
Game                        0
Year                        0
Home                        0
Opponent                    0
WINorLOSS                   0
TeamPoints                  0
OpponentPoints              0
FieldGoals                  0
FieldGoalsAttempted         0
FieldGoals.                 0
X3PointShots                0
X3PointShotsAttempted       0
X3PointShots.               0
FreeThrows                  0
FreeThrowsAttempted         0
FreeThrows.                 0
OffRebounds                 0
TotalRebounds               0
Assists                     0
Steals                      0
Blocks                      0
Turnovers                   0
TotalFouls                  0
Opp.FieldGoals              0
Opp.FieldGoalsAttempted     0
Opp.FieldGoals.             0
Opp.3PointShots             0
Opp.3PointShotsAttempted    0
Opp.3PointShots.            0
Opp.FreeThrows              0
Opp.FreeThrowsAttempted     0
Opp.FreeTh

In [4]:
# Check for duplicates
df[df[['Game', 'Team', 'Year']].duplicated() == True]

Unnamed: 0.1,Unnamed: 0,Team,Game,Year,Home,Opponent,WINorLOSS,TeamPoints,OpponentPoints,FieldGoals,FieldGoalsAttempted,FieldGoals.,X3PointShots,X3PointShotsAttempted,X3PointShots.,FreeThrows,FreeThrowsAttempted,FreeThrows.,OffRebounds,TotalRebounds,Assists,Steals,Blocks,Turnovers,TotalFouls,Opp.FieldGoals,Opp.FieldGoalsAttempted,Opp.FieldGoals.,Opp.3PointShots,Opp.3PointShotsAttempted,Opp.3PointShots.,Opp.FreeThrows,Opp.FreeThrowsAttempted,Opp.FreeThrows.,Opp.OffRebounds,Opp.TotalRebounds,Opp.Assists,Opp.Steals,Opp.Blocks,Opp.Turnovers,Opp.TotalFouls


In [5]:
# Keeping those columns which I think will be relevant
df = df.loc[:, ['Team', 'Year', 'Game','Home', 'Opponent', 'TeamPoints','OpponentPoints', 'Assists', 'TotalRebounds', 'X3PointShots']]

In [6]:
df.head()

Unnamed: 0,Team,Year,Game,Home,Opponent,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots
0,ATL,2015,1,Away,TOR,102,109,26,42,13
1,ATL,2015,2,Home,IND,102,92,26,37,7
2,ATL,2015,3,Away,SAS,92,94,26,37,8
3,ATL,2015,4,Away,CHO,119,122,28,38,13
4,ATL,2015,5,Home,NYK,103,96,18,41,9


In [7]:
#Creating Target Variable
df['TeamWin'] = df['TeamPoints'] > df['OpponentPoints']

In [8]:
df.head()

Unnamed: 0,Team,Year,Game,Home,Opponent,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin
0,ATL,2015,1,Away,TOR,102,109,26,42,13,False
1,ATL,2015,2,Home,IND,102,92,26,37,7,True
2,ATL,2015,3,Away,SAS,92,94,26,37,8,False
3,ATL,2015,4,Away,CHO,119,122,28,38,13,False
4,ATL,2015,5,Home,NYK,103,96,18,41,9,True


In [9]:
#df['Team'][500]

# Phase 1: Feature Engineering

## Adding Feature: Team Conference

In [10]:
#Feature Engineering Adding which conference teams belong to - West considered stronger
conferences = {
    'ATL': 'East', 
    'TOR': 'East', 
    'IND': 'East', 
    'BOS': 'East', 
    'BRK': 'East', 
    'CLE': 'East', 
    'CHO': 'East', 
    'CHI': 'East',
    'PHI': 'East', 
    'WAS': 'East',
    'POR': 'West', 
    'MEM': 'West', 
    'PHO': 'West',
    'NYK': 'East',
    'DAL': 'West', 
    'DEN': 'West', 
    'SAS': 'West', 
    'DET': 'East', 
    'SAC': 'West', 
    'UTA': 'West', 
    'MIN': 'West', 
    'NOP': 'West',
    'HOU': 'West', 
    'ORL':'East', 
    'GSW': 'West', 
    'OKC': 'West', 
    'LAL': 'West', 
    'LAC': 'West', 
    'MIL': 'East', 
    'MIA': 'East'
 # "model": "Mustang",
 # "year": 1964
}

In [11]:
print(len(conferences))

30


In [12]:
# Adding conference to both teams
teamConf=[]
oppConf=[]
#len(df['Team'])):
for i in range(len(df['Team'])):
    for j in conferences:
        #print(df['Team'][i], j)
        if (df['Team'][i] == j):
            teamConf.append(conferences[j])
        elif(df['Opponent'][i] == j):
            oppConf.append(conferences[j])
       # else:
        #    teamConf.append("null")
         #   oppConf.append("null")

#teamConf
#df['OppConf'] = oppConf

In [13]:
df['TeamConf'] = teamConf
df['OppConf'] = oppConf

## Adding Feature: Did team make the Playoffs last year?

In [14]:

#2015 - https://en.wikipedia.org/wiki/2015_NBA_playoffs, https://en.wikipedia.org/wiki/2016_NBA_playoffs ...

Playoffs_2014 = {
    'ATL': 'Y', 'TOR': 'Y', 'IND': 'Y', 'BOS': 'N', 'BRK': 'Y', 'CLE': 'N', 'CHO': 'Y', 'CHI': 'Y','PHI': 'N', 'WAS': 'Y','POR': 'Y', 
    'MEM': 'Y', 'PHO': 'N','NYK': 'N','DAL': 'Y', 'DEN': 'N', 'SAS': 'Y', 'DET': 'N', 'SAC': 'N', 'UTA': 'N', 'MIN': 'N', 'NOP': 'N',
    'HOU': 'Y', 'ORL':'N', 'GSW': 'Y', 'OKC': 'Y', 'LAL': 'N', 'LAC': 'Y', 'MIL': 'N', 'MIA': 'Y'}

Playoffs_2015 = {
    'ATL': 'Y', 'TOR': 'Y', 'IND': 'N', 'BOS': 'Y', 'BRK': 'Y', 'CLE': 'Y', 'CHO': 'N', 'CHI': 'Y','PHI': 'N', 'WAS': 'Y','POR': 'Y', 
    'MEM': 'Y', 'PHO': 'N','NYK': 'N','DAL': 'Y', 'DEN': 'N', 'SAS': 'Y', 'DET': 'N', 'SAC': 'N', 'UTA': 'N', 'MIN': 'N', 'NOP': 'Y',
    'HOU': 'Y', 'ORL':'N', 'GSW': 'Y', 'OKC': 'N', 'LAL': 'N', 'LAC': 'Y', 'MIL': 'Y', 'MIA': 'N'}

Playoffs_2016 = {
    'ATL': 'Y', 'TOR': 'Y', 'IND': 'Y', 'BOS': 'Y', 'BRK': 'N', 'CLE': 'Y', 'CHO': 'Y', 'CHI': 'N','PHI': 'N', 'WAS': 'N','POR': 'Y', 
    'MEM': 'Y', 'PHO': 'N','NYK': 'N','DAL': 'Y', 'DEN': 'N', 'SAS': 'Y', 'DET': 'Y', 'SAC': 'N', 'UTA': 'N', 'MIN': 'N', 'NOP': 'N',
    'HOU': 'Y', 'ORL':'N', 'GSW': 'Y', 'OKC': 'Y', 'LAL': 'N', 'LAC': 'Y', 'MIL': 'N', 'MIA': 'Y'}

Playoffs_2017 = {
    'ATL': 'Y', 'TOR': 'Y', 'IND': 'Y', 'BOS': 'Y', 'BRK': 'N', 'CLE': 'Y', 'CHO': 'N', 'CHI': 'Y','PHI': 'N', 'WAS': 'Y','POR': 'Y', 
    'MEM': 'Y', 'PHO': 'N','NYK': 'N','DAL': 'N', 'DEN': 'N', 'SAS': 'Y', 'DET': 'N', 'SAC': 'N', 'UTA': 'Y', 'MIN': 'N', 'NOP': 'N',
    'HOU': 'Y', 'ORL':'N', 'GSW': 'Y', 'OKC': 'Y', 'LAL': 'N', 'LAC': 'Y', 'MIL': 'Y', 'MIA': 'N'}




In [15]:
# Assign whether team/opposing team made playoffs for each year
teamPlayoffs = []
oppPlayoffs = []

#len(df['Team'])):
for i in range(len(df['Year'] == 2015)):
    for j in Playoffs_2014:
        #print(df['Team'][i], j)
        if (df['Team'][i] == j and df['Year'][i] == 2015):
            teamPlayoffs.append(Playoffs_2014[j])
        elif(df['Opponent'][i] == j and df['Year'][i] == 2015):
            oppPlayoffs.append(Playoffs_2014[j])
            
for i in range(len(df['Year'] == 2016)):       
    for k in Playoffs_2015:
        #print(df['Team'][i], j)
        if (df['Team'][i] == k and df['Year'][i] == 2016):
            teamPlayoffs.append(Playoffs_2015[k])
        elif(df['Opponent'][i] == k and df['Year'][i] == 2016):
            oppPlayoffs.append(Playoffs_2015[k])

for i in range(len(df['Year'] == 2017)):       
    for l in Playoffs_2016:
        #print(df['Team'][i], j)
        if (df['Team'][i] == l and df['Year'][i] == 2017):
            teamPlayoffs.append(Playoffs_2016[l])
        elif(df['Opponent'][i] == l and df['Year'][i] == 2017):
            oppPlayoffs.append(Playoffs_2016[l])
            
for i in range(len(df['Year'] == 2018)):       
    for m in Playoffs_2017:
        #print(df['Team'][i], j)
        if (df['Team'][i] == m and df['Year'][i] == 2018):
            teamPlayoffs.append(Playoffs_2017[m])
        elif(df['Opponent'][i] == m and df['Year'][i] == 2018):
            oppPlayoffs.append(Playoffs_2017[m])


In [16]:
#print(teamPlayoffs)

In [17]:
#oppPlayoffs

In [18]:
# Add columns to the Dataframe
df['TMadeP'] = pd.Series(teamPlayoffs)
df['OppMadeP'] = pd.Series(oppPlayoffs)

In [19]:
df.head(8)

Unnamed: 0,Team,Year,Game,Home,Opponent,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,ATL,2015,1,Away,TOR,102,109,26,42,13,False,East,East,Y,Y
1,ATL,2015,2,Home,IND,102,92,26,37,7,True,East,East,Y,Y
2,ATL,2015,3,Away,SAS,92,94,26,37,8,False,East,West,Y,Y
3,ATL,2015,4,Away,CHO,119,122,28,38,13,False,East,East,Y,Y
4,ATL,2015,5,Home,NYK,103,96,18,41,9,True,East,East,Y,N
5,ATL,2015,6,Away,NYK,91,85,20,38,10,True,East,East,Y,N
6,ATL,2015,7,Home,UTA,100,97,23,46,9,True,East,West,Y,N
7,ATL,2015,8,Home,MIA,114,103,33,36,11,True,East,East,Y,Y


## Add Feature: Winning Odds given to each team before season start

In [20]:
# Adding Pre-season odds
odds = pd.read_csv('odds.csv')
odds.head(10)

#

Unnamed: 0,Team,Odds,Year,W-L O/U,Result,OppOdds,Opponent
0,CLE,275.0,2015.0,58.5,53-29 (under),275.0,CLE
1,SAS,350.0,2015.0,57.0,55-27 (under),350.0,SAS
2,OKC,600.0,2015.0,53.0,45-37 (under),600.0,OKC
3,CHI,700.0,2015.0,55.5,50-32 (under),700.0,CHI
4,LAC,900.0,2015.0,56.5,56-26 (under),900.0,LAC
5,HOU,2200.0,2015.0,49.0,56-26 (over),2200.0,HOU
6,DAL,2500.0,2015.0,49.5,50-32 (over),2500.0,DAL
7,GSW,2800.0,2015.0,52.5,67-15 (over),2800.0,GSW
8,WAS,3000.0,2015.0,48.5,46-36 (under),3000.0,WAS
9,MIA,4500.0,2015.0,44.0,37-45 (under),4500.0,MIA


In [21]:
odds = odds.loc[:, ['Team', 'Year', 'Odds']]
odds.head(10)

Unnamed: 0,Team,Year,Odds
0,CLE,2015.0,275.0
1,SAS,2015.0,350.0
2,OKC,2015.0,600.0
3,CHI,2015.0,700.0
4,LAC,2015.0,900.0
5,HOU,2015.0,2200.0
6,DAL,2015.0,2500.0
7,GSW,2015.0,2800.0
8,WAS,2015.0,3000.0
9,MIA,2015.0,4500.0


In [22]:
#Odds conversion - plus/(plus+100) -> conversion of positive odds
teamOdds = []
opptOdds = []
def convert():
    for j in range(len(odds['Odds'])):
            if (odds['Odds'][j] > 0):
               # print(odds['Odds'][0])
               odds['Odds'][j] = (100/(odds['Odds'][j] + 100))*100
            #x = -187
            elif (odds['Odds'][j] < 0):  
                odds['Odds'][j] = ((-(odds['Odds'][j]))/((-(odds['Odds'][j])) + 100))*100

#def convertOpp():
 #   for j in range(len(odds['OppOdds'])):
  #          if (odds['OppOdds'][j] > 0):
   #            # print(odds['Odds'][0])
    #           odds['OppOdds'][j] = 100/(odds['OppOdds'][j] + 100)
     #       #x = -187
      #      elif (odds['OppOdds'][j] < 0):  
       #         odds['OppOdds'][j] = (-(odds['OppOdds'][j]))/((-(odds['OppOdds'][j])) + 100)
                
#def mapToData():
 
   # for j in range(len(odds['Team'] == 2015)):
 #   for i in range(len(df['Team'] == 2015)):      
  #      for j in range(len(odds['Team'] == 2015.0)):
           # print(df['Team'][i], odds['Team'][j])
   #         if (df['Team'][i] == odds['Team'][j]):
     #           teamOdds.append(odds['Odds'][j])
    #        else:
      #          teamOdds.append('Null')
           # elif(df['Opponent'][i] == j and df['Year'][i] == 2017):
            #    oppPlayoffs.append(Playoffs_2016[j])
    

convert()
#convertOpp()
#mapToData()
#teamOdds

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


In [23]:
odds

Unnamed: 0,Team,Year,Odds
0,CLE,2015.0,26.666667
1,SAS,2015.0,22.222222
2,OKC,2015.0,14.285714
3,CHI,2015.0,12.500000
4,LAC,2015.0,10.000000
5,HOU,2015.0,4.347826
6,DAL,2015.0,3.846154
7,GSW,2015.0,3.448276
8,WAS,2015.0,3.225806
9,MIA,2015.0,2.173913


In [24]:
#horizontal_stack = pd.concat([df, odds], axis=1)


In [25]:
dfmerging = pd.merge(odds, df, on=['Year', 'Team'], how='outer')
 
dfmerging

Unnamed: 0,Team,Year,Odds,Game,Home,Opponent,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,CLE,2015.0,26.666667,1.0,Home,NYK,90.0,95.0,22.0,33.0,9.0,False,East,East,N,N
1,CLE,2015.0,26.666667,2.0,Away,CHI,114.0,108.0,18.0,52.0,3.0,True,East,East,N,Y
2,CLE,2015.0,26.666667,3.0,Away,POR,82.0,101.0,18.0,41.0,9.0,False,East,West,N,Y
3,CLE,2015.0,26.666667,4.0,Away,UTA,100.0,102.0,6.0,30.0,6.0,False,East,West,N,N
4,CLE,2015.0,26.666667,5.0,Away,DEN,110.0,101.0,25.0,42.0,6.0,True,East,West,N,N
5,CLE,2015.0,26.666667,6.0,Home,NOP,118.0,111.0,26.0,45.0,13.0,True,East,West,N,N
6,CLE,2015.0,26.666667,7.0,Away,BOS,122.0,121.0,22.0,40.0,8.0,True,East,East,N,N
7,CLE,2015.0,26.666667,8.0,Home,ATL,127.0,94.0,39.0,47.0,19.0,True,East,East,N,Y
8,CLE,2015.0,26.666667,9.0,Home,DEN,97.0,106.0,18.0,42.0,10.0,False,East,West,N,N
9,CLE,2015.0,26.666667,10.0,Home,SAS,90.0,92.0,23.0,38.0,7.0,False,East,West,N,Y


In [26]:
opOdds = pd.read_csv('oppodds.csv')

In [27]:
opOdds

Unnamed: 0,Team,Odds,Year,W-L O/U,Result,OppOdds,Opponent
0,CLE,275.0,2015.0,58.5,53-29 (under),275.0,CLE
1,SAS,350.0,2015.0,57.0,55-27 (under),350.0,SAS
2,OKC,600.0,2015.0,53.0,45-37 (under),600.0,OKC
3,CHI,700.0,2015.0,55.5,50-32 (under),700.0,CHI
4,LAC,900.0,2015.0,56.5,56-26 (under),900.0,LAC
5,HOU,2200.0,2015.0,49.0,56-26 (over),2200.0,HOU
6,DAL,2500.0,2015.0,49.5,50-32 (over),2500.0,DAL
7,GSW,2800.0,2015.0,52.5,67-15 (over),2800.0,GSW
8,WAS,3000.0,2015.0,48.5,46-36 (under),3000.0,WAS
9,MIA,4500.0,2015.0,44.0,37-45 (under),4500.0,MIA


In [28]:
#opOdds.rename(columns={'Odds: OpOdds'})
opOdds = opOdds.loc[:, ['Opponent', 'Year', 'Odds']]

In [29]:
opOdds

Unnamed: 0,Opponent,Year,Odds
0,CLE,2015.0,275.0
1,SAS,2015.0,350.0
2,OKC,2015.0,600.0
3,CHI,2015.0,700.0
4,LAC,2015.0,900.0
5,HOU,2015.0,2200.0
6,DAL,2015.0,2500.0
7,GSW,2015.0,2800.0
8,WAS,2015.0,3000.0
9,MIA,2015.0,4500.0


In [30]:
for i in range(len(opOdds['Odds'])):
    if (opOdds['Odds'][i] > 0):
        # print(odds['Odds'][0])
        opOdds['Odds'][i] = (100/(opOdds['Odds'][i] + 100))*100
    #        #x = -187
    elif (opOdds['Odds'][i] < 0):  
        opOdds['Odds'][i] = ((-(opOdds['Odds'][i]))/((-opOdds['Odds'][i]) + 100))*100

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [31]:
opOdds

Unnamed: 0,Opponent,Year,Odds
0,CLE,2015.0,26.666667
1,SAS,2015.0,22.222222
2,OKC,2015.0,14.285714
3,CHI,2015.0,12.500000
4,LAC,2015.0,10.000000
5,HOU,2015.0,4.347826
6,DAL,2015.0,3.846154
7,GSW,2015.0,3.448276
8,WAS,2015.0,3.225806
9,MIA,2015.0,2.173913


In [32]:
#for j in range(len(opptOdds['Opponent'])):
 #   if (opptOdds['OppOdds'][j] > 0):
  #      # print(odds['Odds'][0])
   #        opptOdds['OppOdds'][j] = 100/(opptOdds['OppOdds'][j] + 100)
    #        #x = -187
    #elif (opptOdds['OppOdds'][j] < 0):  
     #       opptOdds['OppOdds'][j] = (-(opptOdds['OppOdds'][j]))/((-opptOdds['OppOdds']) + 100)

In [33]:
#oppConvert()

In [34]:
df2 = pd.merge(opOdds, dfmerging, on=['Year', 'Opponent'], how='outer')

In [35]:
df2

Unnamed: 0,Opponent,Year,Odds_x,Team,Odds_y,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,CLE,2015.0,26.666667,SAS,22.222222,11.0,Away,92.0,90.0,26.0,42.0,5.0,True,West,East,Y,N
1,CLE,2015.0,26.666667,SAS,22.222222,64.0,Home,125.0,128.0,28.0,41.0,8.0,False,West,East,Y,N
2,CLE,2015.0,26.666667,OKC,14.285714,22.0,Home,103.0,94.0,22.0,46.0,7.0,True,West,East,Y,N
3,CLE,2015.0,26.666667,OKC,14.285714,44.0,Away,98.0,108.0,26.0,42.0,10.0,False,West,East,Y,N
4,CLE,2015.0,26.666667,CHI,12.500000,2.0,Home,108.0,114.0,25.0,42.0,13.0,False,East,East,Y,N
5,CLE,2015.0,26.666667,CHI,12.500000,43.0,Away,94.0,108.0,12.0,40.0,8.0,False,East,East,Y,N
6,CLE,2015.0,26.666667,CHI,12.500000,54.0,Home,113.0,98.0,29.0,47.0,9.0,True,East,East,Y,N
7,CLE,2015.0,26.666667,CHI,12.500000,77.0,Away,94.0,99.0,28.0,45.0,11.0,False,East,East,Y,N
8,CLE,2015.0,26.666667,LAC,10.000000,40.0,Home,121.0,126.0,26.0,43.0,10.0,False,West,East,Y,N
9,CLE,2015.0,26.666667,LAC,10.000000,50.0,Away,94.0,105.0,24.0,43.0,5.0,False,West,East,Y,N


In [36]:
df2.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds'}, inplace=True)

In [37]:
#df = pd.DataFrame(dfmerging)
#df2['TeamHigherOdds'] = df2['TOdds'] > df2['OpOdds']

In [38]:
df2.head(7)

Unnamed: 0,Opponent,Year,OpOdds,Team,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,CLE,2015.0,26.666667,SAS,22.222222,11.0,Away,92.0,90.0,26.0,42.0,5.0,True,West,East,Y,N
1,CLE,2015.0,26.666667,SAS,22.222222,64.0,Home,125.0,128.0,28.0,41.0,8.0,False,West,East,Y,N
2,CLE,2015.0,26.666667,OKC,14.285714,22.0,Home,103.0,94.0,22.0,46.0,7.0,True,West,East,Y,N
3,CLE,2015.0,26.666667,OKC,14.285714,44.0,Away,98.0,108.0,26.0,42.0,10.0,False,West,East,Y,N
4,CLE,2015.0,26.666667,CHI,12.5,2.0,Home,108.0,114.0,25.0,42.0,13.0,False,East,East,Y,N
5,CLE,2015.0,26.666667,CHI,12.5,43.0,Away,94.0,108.0,12.0,40.0,8.0,False,East,East,Y,N
6,CLE,2015.0,26.666667,CHI,12.5,54.0,Home,113.0,98.0,29.0,47.0,9.0,True,East,East,Y,N


In [39]:
df2.isnull().sum()

Opponent          36
Year              36
OpOdds            36
Team              36
TOdds             36
Game              36
Home              36
TeamPoints        36
OpponentPoints    36
Assists           36
TotalRebounds     36
X3PointShots      36
TeamWin           36
TeamConf          36
OppConf           36
TMadeP            36
OppMadeP          36
dtype: int64

In [40]:
df2.dropna(how='any', inplace=True)

In [41]:
df2.isnull().sum()

Opponent          0
Year              0
OpOdds            0
Team              0
TOdds             0
Game              0
Home              0
TeamPoints        0
OpponentPoints    0
Assists           0
TotalRebounds     0
X3PointShots      0
TeamWin           0
TeamConf          0
OppConf           0
TMadeP            0
OppMadeP          0
dtype: int64

In [42]:
df2

Unnamed: 0,Opponent,Year,OpOdds,Team,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,CLE,2015.0,26.666667,SAS,22.222222,11.0,Away,92.0,90.0,26.0,42.0,5.0,True,West,East,Y,N
1,CLE,2015.0,26.666667,SAS,22.222222,64.0,Home,125.0,128.0,28.0,41.0,8.0,False,West,East,Y,N
2,CLE,2015.0,26.666667,OKC,14.285714,22.0,Home,103.0,94.0,22.0,46.0,7.0,True,West,East,Y,N
3,CLE,2015.0,26.666667,OKC,14.285714,44.0,Away,98.0,108.0,26.0,42.0,10.0,False,West,East,Y,N
4,CLE,2015.0,26.666667,CHI,12.500000,2.0,Home,108.0,114.0,25.0,42.0,13.0,False,East,East,Y,N
5,CLE,2015.0,26.666667,CHI,12.500000,43.0,Away,94.0,108.0,12.0,40.0,8.0,False,East,East,Y,N
6,CLE,2015.0,26.666667,CHI,12.500000,54.0,Home,113.0,98.0,29.0,47.0,9.0,True,East,East,Y,N
7,CLE,2015.0,26.666667,CHI,12.500000,77.0,Away,94.0,99.0,28.0,45.0,11.0,False,East,East,Y,N
8,CLE,2015.0,26.666667,LAC,10.000000,40.0,Home,121.0,126.0,26.0,43.0,10.0,False,West,East,Y,N
9,CLE,2015.0,26.666667,LAC,10.000000,50.0,Away,94.0,105.0,24.0,43.0,5.0,False,West,East,Y,N


## Add Feature: Strength of Roster

In [43]:
# Adding Strength of Roster - data from https://hoopshype.com/nba2k/teams/
roster = pd.read_csv('roster.csv')
roster.head(10)

Unnamed: 0,Team,Rating,Year
0,CHI,77.5,2015.0
1,CLE,77.36,2015.0
2,LAC,76.15,2015.0
3,MIA,76.08,2015.0
4,OKC,76.07,2015.0
5,SAS,75.88,2015.0
6,CHO,75.57,2015.0
7,WAS,75.0,2015.0
8,POR,75.0,2015.0
9,DAL,74.75,2015.0


In [44]:
df3 = pd.merge(roster, df2, on=['Team','Year'], how='outer')
df3.dropna(how='any', inplace=True)

In [45]:
#horizontal_stacks = pd.concat([df, roster], axis=1)
#dfmerge2 = pd.merge(roster, df, on=['Team','Year'], how='outer')
#dfmerge2.isnull().sum()

In [46]:
#dfmerge2.dropna(how='any', inplace=True)

In [47]:
df3.isnull().sum()

Team              0
Rating            0
Year              0
Opponent          0
OpOdds            0
TOdds             0
Game              0
Home              0
TeamPoints        0
OpponentPoints    0
Assists           0
TotalRebounds     0
X3PointShots      0
TeamWin           0
TeamConf          0
OppConf           0
TMadeP            0
OppMadeP          0
dtype: int64

In [48]:
#df.loc[(df['Team'] == 'CLE')]

In [49]:
#df = dfmerge2

In [50]:
#df3.loc[(df3['Team'] == 'CLE')]

In [51]:
oppRoster = pd.read_csv('opproster.csv')
oppRoster.head(10)

Unnamed: 0,Opponent,Rating,Year
0,CHI,77.5,2015.0
1,CLE,77.36,2015.0
2,LAC,76.15,2015.0
3,MIA,76.08,2015.0
4,OKC,76.07,2015.0
5,SAS,75.88,2015.0
6,CHO,75.57,2015.0
7,WAS,75.0,2015.0
8,POR,75.0,2015.0
9,DAL,74.75,2015.0


In [52]:
df4 = pd.merge(oppRoster, df3, on=['Year', 'Opponent'], how='outer')

In [53]:
#dfmerge2 = pd.merge(roster, df2, on=['Team','Year'], how='outer')
#d#fmerge2.dropna(how='any', inplace=True)
df4

Unnamed: 0,Opponent,Rating_x,Year,Team,Rating_y,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,2.0,Away,114.0,108.0,18.0,52.0,3.0,True,East,East,N,Y
1,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,42.0,Home,108.0,94.0,20.0,54.0,8.0,True,East,East,N,Y
2,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,55.0,Away,98.0,113.0,20.0,43.0,8.0,False,East,East,N,Y
3,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,77.0,Home,99.0,94.0,24.0,40.0,16.0,True,East,East,N,Y
4,CHI,77.50,2015.0,LAC,76.15,12.500000,10.000000,9.0,Home,89.0,105.0,20.0,40.0,7.0,False,West,East,Y,Y
5,CHI,77.50,2015.0,LAC,76.15,12.500000,10.000000,60.0,Away,96.0,86.0,15.0,55.0,7.0,True,West,East,Y,Y
6,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,24.0,Home,75.0,93.0,12.0,40.0,4.0,False,East,East,Y,Y
7,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,44.0,Away,96.0,84.0,19.0,54.0,1.0,True,East,East,Y,Y
8,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,79.0,Home,78.0,89.0,14.0,46.0,5.0,False,East,East,Y,Y
9,CHI,77.50,2015.0,OKC,76.07,12.500000,14.285714,62.0,Away,105.0,108.0,20.0,41.0,8.0,False,West,East,Y,Y


In [54]:
#dfmerge2.
####################

df4.rename(columns={'Rating_x':'OppRating', 'Rating_y': 'TeamRating'}, inplace=True)
df4.dropna(how='any', inplace=True)
df4.isnull().sum()

Opponent          0
OppRating         0
Year              0
Team              0
TeamRating        0
OpOdds            0
TOdds             0
Game              0
Home              0
TeamPoints        0
OpponentPoints    0
Assists           0
TotalRebounds     0
X3PointShots      0
TeamWin           0
TeamConf          0
OppConf           0
TMadeP            0
OppMadeP          0
dtype: int64

In [55]:
#df4.loc[:,['OppRating']] = label_encoder.fit_transform(df.loc[:,['OppRating']]).astype('float64')
#df4.loc[:,['TeamRating']] = label_encoder.fit_transform(df.loc[:,['TeamRating']]).astype('float64')

In [56]:
df = pd.DataFrame(df4)

In [57]:
# Converting categorical data into float data

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_transformer
import seaborn as sns

#column_trans = make_column_transformer(
 #   (OneHotEncoder(), ['Home' , 'teamRslt', 'opptAbbr' , 'opptRslt']),
  #  remainder='passthrough')
#column_trans.fit_transform(X)


label_encoder = LabelEncoder()
df.loc[:,['Home']] = label_encoder.fit_transform(df.loc[:,['Home']]).astype('float64')
df.loc[:,['TeamConf']] = label_encoder.fit_transform(df.loc[:,['TeamConf']]).astype('float64')
df.loc[:,['OppConf']] = label_encoder.fit_transform(df.loc[:,['OppConf']]).astype('float64')
df.loc[:,['TMadeP']] = label_encoder.fit_transform(df.loc[:,['TMadeP']]).astype('float64')
df.loc[:,['OppMadeP']] = label_encoder.fit_transform(df.loc[:,['OppMadeP']]).astype('float64')
df.loc[:,['TeamWin']] = label_encoder.fit_transform(df.loc[:,['TeamWin']]).astype('float64')
#df.loc[:,['TOdds']] = label_encoder.fit_transform(df.loc[:,['TOdds']]).astype('float64')
#df.loc[:,['OpOdds']] = label_encoder.fit_transform(df.loc[:,['OpOdds']]).astype('float64')

#df2.loc[:,['TeamHigherOdds']] = label_encoder.fit_transform(df2.loc[:,['TeamHigherOdds']]).astype('float64')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [58]:
df

Unnamed: 0,Opponent,OppRating,Year,Team,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP
0,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,2.0,0.0,114.0,108.0,18.0,52.0,3.0,1.0,0.0,0.0,0.0,1.0
1,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,42.0,1.0,108.0,94.0,20.0,54.0,8.0,1.0,0.0,0.0,0.0,1.0
2,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,55.0,0.0,98.0,113.0,20.0,43.0,8.0,0.0,0.0,0.0,0.0,1.0
3,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,77.0,1.0,99.0,94.0,24.0,40.0,16.0,1.0,0.0,0.0,0.0,1.0
4,CHI,77.50,2015.0,LAC,76.15,12.500000,10.000000,9.0,1.0,89.0,105.0,20.0,40.0,7.0,0.0,1.0,0.0,1.0,1.0
5,CHI,77.50,2015.0,LAC,76.15,12.500000,10.000000,60.0,0.0,96.0,86.0,15.0,55.0,7.0,1.0,1.0,0.0,1.0,1.0
6,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,24.0,1.0,75.0,93.0,12.0,40.0,4.0,0.0,0.0,0.0,1.0,1.0
7,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,44.0,0.0,96.0,84.0,19.0,54.0,1.0,1.0,0.0,0.0,1.0,1.0
8,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,79.0,1.0,78.0,89.0,14.0,46.0,5.0,0.0,0.0,0.0,1.0,1.0
9,CHI,77.50,2015.0,OKC,76.07,12.500000,14.285714,62.0,0.0,105.0,108.0,20.0,41.0,8.0,0.0,1.0,0.0,1.0,1.0


In [59]:
df['TeamStronger'] = df['TeamRating'] > df['OppRating']
df.loc[:,['TeamStronger']] = label_encoder.fit_transform(df.loc[:,['TeamStronger']]).astype('float64')

  y = column_or_1d(y, warn=True)


In [60]:
df['TeamBetterOdds'] = df['TOdds'] >= df['OpOdds']
df.loc[:,['TeamBetterOdds']] = label_encoder.fit_transform(df.loc[:,['TeamBetterOdds']]).astype('float64')

  y = column_or_1d(y, warn=True)


In [61]:
#df.isempty().sum()
df.dropna(how='any', inplace=True)

In [62]:
#df.iloc[2460, : ]

In [63]:
df.loc[(df['Year'] == 2018)]

Unnamed: 0,Opponent,OppRating,Year,Team,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds
7383,LAC,77.38,2018.0,GSW,77.35,0.523560,65.156794,8.0,0.0,141.0,113.0,37.0,46.0,14.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
7384,LAC,77.38,2018.0,GSW,77.35,0.523560,65.156794,40.0,0.0,121.0,105.0,31.0,54.0,13.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
7385,LAC,77.38,2018.0,GSW,77.35,0.523560,65.156794,42.0,1.0,106.0,125.0,26.0,33.0,8.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0
7386,LAC,77.38,2018.0,GSW,77.35,0.523560,65.156794,59.0,1.0,134.0,127.0,31.0,32.0,14.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
7387,LAC,77.38,2018.0,NOP,76.92,0.523560,0.452489,13.0,1.0,111.0,103.0,26.0,51.0,6.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
7388,LAC,77.38,2018.0,NOP,76.92,0.523560,0.452489,49.0,1.0,103.0,112.0,23.0,47.0,9.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
7389,LAC,77.38,2018.0,NOP,76.92,0.523560,0.452489,63.0,0.0,121.0,116.0,39.0,51.0,13.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
7390,LAC,77.38,2018.0,NOP,76.92,0.523560,0.452489,81.0,0.0,113.0,100.0,34.0,51.0,11.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
7391,LAC,77.38,2018.0,DEN,76.87,0.523560,0.497512,45.0,0.0,104.0,109.0,23.0,54.0,5.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
7392,LAC,77.38,2018.0,DEN,76.87,0.523560,0.497512,61.0,1.0,120.0,122.0,27.0,41.0,10.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0


## Add Feature: How many All-Stars per team?

In [64]:
# All Stars
All_Star_2015 = {
    'ATL': 3, 'TOR': 1, 'IND': 0, 'BOS': 0, 'BRK': 0, 'CLE': 2, 'CHO': 0, 'CHI': 2,'PHI': 0, 'WAS': 1,'POR': 1, 
    'MEM': 1, 'PHO': 0,'NYK': 1,'DAL': 0, 'DEN': 0, 'SAS': 1, 'DET': 0, 'SAC': 0, 'UTA': 0, 'MIN': 0, 'NOP': 1,
    'HOU': 1, 'ORL':0, 'GSW': 2, 'OKC': 2, 'LAL': 1, 'LAC': 2, 'MIL': 0, 'MIA': 2}

All_Star_2016 = {
    'ATL': 1, 'TOR': 2, 'IND': 1, 'BOS': 1, 'BRK': 0, 'CLE': 1, 'CHO': 0, 'CHI': 1,'PHI': 0, 'WAS': 1,'POR': 0, 
    'MEM': 0, 'PHO': 0,'NYK': 1,'DAL': 0, 'DEN': 0, 'SAS': 2, 'DET': 1, 'SAC': 1, 'UTA': 0, 'MIN': 0, 'NOP': 1,
    'HOU': 1, 'ORL':0, 'GSW': 3, 'OKC': 2, 'LAL': 1, 'LAC': 1, 'MIL': 0, 'MIA': 2}

All_Star_2017 = {
    'ATL': 1, 'TOR': 2, 'IND': 1, 'BOS': 1, 'BRK': 0, 'CLE': 3, 'CHO': 1, 'CHI': 1,'PHI': 0, 'WAS': 1,'POR': 0, 
    'MEM': 1, 'PHO': 0,'NYK': 0,'DAL': 0, 'DEN': 0, 'SAS': 1, 'DET': 0, 'SAC': 1, 'UTA': 1, 'MIN': 0, 'NOP': 1,
    'HOU': 1, 'ORL':0, 'GSW': 4, 'OKC': 1, 'LAL': 0, 'LAC': 1, 'MIL': 1, 'MIA': 0}

All_Star_2018 = {
    'ATL': 0, 'TOR': 2, 'IND': 1, 'BOS': 2, 'BRK': 0, 'CLE': 2, 'CHO': 0, 'CHI': 0,'PHI': 1, 'WAS': 2,'POR': 1, 
    'MEM': 0, 'PHO': 0,'NYK': 1,'DAL': 0, 'DEN': 0, 'SAS': 1, 'DET': 0, 'SAC': 0, 'UTA': 0, 'MIN': 2, 'NOP': 2,
    'HOU': 1, 'ORL':0, 'GSW': 4, 'OKC': 1, 'LAL': 0, 'LAC': 0, 'MIL': 1, 'MIA': 0}



In [65]:
#teamConf[500]
teamAllStars = []
oppAllStars = []

#len(df['Team'])):
for i in range(2459):
    for j in All_Star_2015:
       # print(df['Team'][i], j)
        if (df['Team'][i] == j and df['Year'][i] == 2015):
            teamAllStars.append(All_Star_2015[j])
        elif(df['Opponent'][i] == j and df['Year'][i] == 2015):
            oppAllStars.append(All_Star_2015[j])
            
for i in range(2463,4922):       
    for k in All_Star_2016:
        #print(df['Team'][i], j)
        if (df['Team'][i] == k and df['Year'][i] == 2016):
            teamAllStars.append(All_Star_2016[k])
        elif(df['Opponent'][i] == k and df['Year'][i] == 2016):
            oppAllStars.append(All_Star_2016[k])

for i in range(4923,7382):       
    for l in All_Star_2017:
        #print(df['Team'][i], j)
        if (df['Team'][i] == l and df['Year'][i] == 2017):
            teamAllStars.append(All_Star_2017[l])
        elif(df['Opponent'][i] == l and df['Year'][i] == 2017):
            oppAllStars.append(All_Star_2017[l])
            
for i in range(7383,9843):       
    for m in All_Star_2018:
        #print(df['Team'][i], j)
        if (df['Team'][i] == m and df['Year'][i] == 2018):
            teamAllStars.append(All_Star_2018[m])
        elif(df['Opponent'][i] == m and df['Year'][i] == 2018):
            oppAllStars.append(All_Star_2018[m])

In [66]:
#teamAllStars
df['TeamAllStars'] = pd.Series(teamAllStars)
df['OppAllStars'] = pd.Series(oppAllStars)

In [67]:
df.isna().sum()

Opponent          0
OppRating         0
Year              0
Team              0
TeamRating        0
OpOdds            0
TOdds             0
Game              0
Home              0
TeamPoints        0
OpponentPoints    0
Assists           0
TotalRebounds     0
X3PointShots      0
TeamWin           0
TeamConf          0
OppConf           0
TMadeP            0
OppMadeP          0
TeamStronger      0
TeamBetterOdds    0
TeamAllStars      6
OppAllStars       6
dtype: int64

In [68]:
df.dropna(how='any', inplace=True)

In [69]:
df

Unnamed: 0,Opponent,OppRating,Year,Team,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars
0,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,2.0,0.0,114.0,108.0,18.0,52.0,3.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0
1,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,42.0,1.0,108.0,94.0,20.0,54.0,8.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0
2,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,55.0,0.0,98.0,113.0,20.0,43.0,8.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0
3,CHI,77.50,2015.0,CLE,77.36,12.500000,26.666667,77.0,1.0,99.0,94.0,24.0,40.0,16.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,2.0
4,CHI,77.50,2015.0,LAC,76.15,12.500000,10.000000,9.0,1.0,89.0,105.0,20.0,40.0,7.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0
5,CHI,77.50,2015.0,LAC,76.15,12.500000,10.000000,60.0,0.0,96.0,86.0,15.0,55.0,7.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0
6,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,24.0,1.0,75.0,93.0,12.0,40.0,4.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0
7,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,44.0,0.0,96.0,84.0,19.0,54.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0
8,CHI,77.50,2015.0,MIA,76.08,12.500000,2.173913,79.0,1.0,78.0,89.0,14.0,46.0,5.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0
9,CHI,77.50,2015.0,OKC,76.07,12.500000,14.285714,62.0,0.0,105.0,108.0,20.0,41.0,8.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,2.0


In [70]:
#df[]
df['AllStarAdv'] = df['TeamAllStars'] >= df['TeamAllStars']
df.loc[:,['AllStarAdv']] = label_encoder.fit_transform(df.loc[:,['AllStarAdv']]).astype('float64')

  y = column_or_1d(y, warn=True)


In [71]:
df5 = df.sort_values(['Year', 'Team', 'Game'], ascending=True)
df5.reset_index(drop=True,inplace=True)
df5

Unnamed: 0,Opponent,OppRating,Year,Team,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars,AllStarAdv
0,TOR,74.21,2015.0,ATL,73.67,1.960784,0.793651,1.0,0.0,102.0,109.0,26.0,42.0,13.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,1.0,0.0
1,IND,73.73,2015.0,ATL,73.67,1.234568,0.793651,2.0,1.0,102.0,92.0,26.0,37.0,7.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0
2,SAS,75.88,2015.0,ATL,73.67,22.222222,0.793651,3.0,0.0,92.0,94.0,26.0,37.0,8.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,1.0,0.0
3,CHO,75.57,2015.0,ATL,73.67,1.162791,0.793651,4.0,0.0,119.0,122.0,28.0,38.0,13.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,0.0,0.0
4,NYK,72.00,2015.0,ATL,73.67,1.785714,0.793651,5.0,1.0,103.0,96.0,18.0,41.0,9.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,3.0,1.0,0.0
5,NYK,72.00,2015.0,ATL,73.67,1.785714,0.793651,6.0,0.0,91.0,85.0,20.0,38.0,10.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,3.0,1.0,0.0
6,UTA,73.36,2015.0,ATL,73.67,0.398406,0.793651,7.0,1.0,100.0,97.0,23.0,46.0,9.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,3.0,0.0,0.0
7,MIA,76.08,2015.0,ATL,73.67,2.173913,0.793651,8.0,1.0,114.0,103.0,33.0,36.0,11.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,3.0,2.0,0.0
8,CLE,77.36,2015.0,ATL,73.67,26.666667,0.793651,9.0,0.0,94.0,127.0,26.0,37.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,2.0,0.0
9,LAL,73.38,2015.0,ATL,73.67,0.990099,0.793651,10.0,1.0,109.0,114.0,22.0,38.0,9.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,3.0,1.0,0.0


In [72]:
df = df5

## Add Feature: Projected win total from start of the season

In [73]:
#Add Projected Win total -> https://www.basketball-reference.com/
statistics = pd.read_csv('statistics.csv')
#statistics.head()
statistics = statistics.loc[:, ['Year','Team', 'PW']]

In [74]:

Ostatistics = pd.read_csv('Ostatistics.csv')
#Ostatistics.head()
Ostatistics = Ostatistics.loc[:, ['Year','Opponent', 'PW']]

In [75]:
#statistics['Year'] = statistics['Year'].astype(int)
#statistics.dtypes
#statistics.Year = statistics.Year.astype(float)
#statistics['Year'] = statistics['Year'].values.astype(float)

In [76]:
statistics

Unnamed: 0,Year,Team,PW
0,2015.0,GSW,65.0
1,2015.0,LAC,58.0
2,2015.0,SAS,58.0
3,2015.0,ATL,56.0
4,2015.0,POR,53.0
5,2015.0,CLE,53.0
6,2015.0,HOU,50.0
7,2015.0,MEM,50.0
8,2015.0,DAL,49.0
9,2015.0,CHI,50.0


In [77]:
dfS = pd.merge(statistics, df, on=['Year', 'Team'], how='outer')

In [78]:
dfS

Unnamed: 0,Year,Team,PW,Opponent,OppRating,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars,AllStarAdv
0,2015.0,GSW,65.0,SAC,73.69,73.88,0.398406,3.448276,1.0,0.0,95.0,77.0,21.0,44.0,6.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,0.0,0.0
1,2015.0,GSW,65.0,LAL,73.38,73.88,0.990099,3.448276,2.0,1.0,127.0,104.0,30.0,43.0,11.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,0.0
2,2015.0,GSW,65.0,POR,75.00,73.88,1.960784,3.448276,3.0,0.0,95.0,90.0,22.0,41.0,6.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,0.0
3,2015.0,GSW,65.0,LAC,76.15,73.88,10.000000,3.448276,4.0,1.0,121.0,104.0,29.0,39.0,15.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,2.0,0.0
4,2015.0,GSW,65.0,HOU,74.13,73.88,4.347826,3.448276,5.0,0.0,98.0,87.0,22.0,54.0,9.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,1.0,0.0
5,2015.0,GSW,65.0,PHO,74.07,73.88,1.162791,3.448276,6.0,0.0,95.0,107.0,16.0,36.0,11.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0
6,2015.0,GSW,65.0,SAS,75.88,73.88,22.222222,3.448276,7.0,1.0,100.0,113.0,23.0,38.0,7.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,1.0,0.0
7,2015.0,GSW,65.0,BRK,73.29,73.88,1.639344,3.448276,8.0,1.0,107.0,99.0,30.0,45.0,11.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,2.0,0.0,0.0
8,2015.0,GSW,65.0,CHO,75.57,73.88,1.162791,3.448276,9.0,1.0,112.0,87.0,30.0,51.0,11.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,2.0,0.0,0.0
9,2015.0,GSW,65.0,LAL,73.38,73.88,0.990099,3.448276,10.0,0.0,136.0,115.0,32.0,51.0,14.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,0.0


In [79]:
dfOS = pd.merge(Ostatistics, dfS, on=['Year', 'Opponent'], how='outer')

In [80]:
dfOS

Unnamed: 0,Year,Opponent,PW_x,Team,PW_y,OppRating,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars,AllStarAdv
0,2015.0,GSW,65.0,LAC,58.0,73.88,76.15,3.448276,10.000000,5.0,0.0,104.0,121.0,26.0,30.0,7.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
1,2015.0,GSW,65.0,LAC,58.0,73.88,76.15,3.448276,10.000000,30.0,1.0,100.0,86.0,18.0,50.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
2,2015.0,GSW,65.0,LAC,58.0,73.88,76.15,3.448276,10.000000,63.0,0.0,98.0,106.0,25.0,39.0,8.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
3,2015.0,GSW,65.0,LAC,58.0,73.88,76.15,3.448276,10.000000,75.0,1.0,106.0,110.0,19.0,41.0,6.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
4,2015.0,GSW,65.0,SAS,58.0,73.88,75.88,3.448276,22.222222,7.0,0.0,113.0,100.0,28.0,36.0,10.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0
5,2015.0,GSW,65.0,SAS,58.0,73.88,75.88,3.448276,22.222222,55.0,0.0,99.0,110.0,25.0,44.0,6.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0
6,2015.0,GSW,65.0,SAS,58.0,73.88,75.88,3.448276,22.222222,77.0,1.0,107.0,92.0,27.0,47.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0
7,2015.0,GSW,65.0,ATL,56.0,73.88,73.67,3.448276,0.793651,51.0,1.0,124.0,116.0,28.0,38.0,15.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,2.0,0.0
8,2015.0,GSW,65.0,ATL,56.0,73.88,73.67,3.448276,0.793651,68.0,0.0,95.0,114.0,24.0,44.0,8.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,2.0,0.0
9,2015.0,GSW,65.0,POR,53.0,73.88,75.00,3.448276,1.960784,3.0,1.0,90.0,95.0,20.0,56.0,7.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,0.0


In [81]:
dfOS.rename(columns={'PW_x':'OProjW', 'PW_y': 'TProjW'}, inplace=True)

In [82]:
df = dfOS

In [83]:
#df.loc[df['Year'] == 2018]

In [84]:
#df.isempty().sum()
df.dropna(how='any', inplace=True)

In [85]:
#df.describe()

In [86]:
df = df.sort_values(['Year', 'Team', 'Game'], ascending=True)
df.reset_index(drop=True,inplace=True)

In [87]:
# Find pts for last game?
#points = [0]
#fgs = [0]
#rebs = [0]
#asts = [0]
#tpfg = [0]

#for i in range(1,len(df['Team'])):
       # if (df['Game'][i] == 1):
       #     points.append(0)
       #     fgs.append(0)
      #      rebs.append(0)
     #       asts.append(0)
           # tpfg.append(0)
        #elif (df['Game'][i] == 2):
         #   points.append(df['TeamPoints'][i - 1])
        #    fgs.append(df['FieldGoals'][i - 1])
         #   rebs.append(df['TotalRebounds'][i - 1])
         # #  asts.append(df['X3PointShots'][i - 1])
          #  tpfg.append(df['Assists'][i - 1])
        #else:
          #  points.append((df['TeamPoints'][i - 1] + df['TeamPoints'][i - 2])/2)
          #  fgs.append((df['FieldGoals'][i - 1] + df['FieldGoals'][i - 2])/2)
           # rebs.append((df['TotalRebounds'][i - 1] + df['TotalRebounds'][i - 2])/2)
            #asts.append((df['Assists'][i - 1] + df['Assists'][i - 2])/2)
           # tpfg.append((df['X3PointShots'][i - 1] + df['X3PointShots'][i - 2])/2)
        


In [88]:
#df['PastPoints'] = pd.Series(points)
#df['TFG'] = pd.Series(fgs)
#df['TAssists'] = pd.Series(asts)
#df['T3s'] = pd.Series(tpfg)
#df['Rebs'] = pd.Series(rebs)

In [89]:
# Negative features - code same for Age, Pace, and 
#statistics4 = pd.read_csv('statistics.csv')
#statistics4= statistics4.loc[:, ['Team', 'Year', 'Pace']]
#df = pd.merge(statistics4, df, on = ['Year', 'Team'], how='outer')
#
#statistics5 = pd.read_csv('Ostatistics.csv')
#statistics5 = statistics5.loc[:, ['Opponent', 'Year', 'Pace']]
#df = pd.merge(statistics5, df, on = ['Year', 'Opponent'], how='outer')

#df.rename(columns={'Pace_x':'OpPace', 'Pace_y': 'TPace' }, inplace=True)

In [90]:
# Add Projected Win total
#statistics = pd.read_csv('statistics.csv')
#statistics.head()

In [91]:
#statistics.loc[:, ['Team', 'PW']]

In [92]:
#Ostatistics = pd.read_csv('Ostatistics.csv')
#Ostatistics.head()

In [93]:
#Ostatistics.loc[:,['Opponent', 'PW']]

In [94]:
#dfS = pd.concat([statistics, df])

In [95]:
#dfS

## Add Feature: Strength of Schedule

In [96]:
statistics2 = pd.read_csv('statistics.csv')

In [97]:
statistics2.corr().head() 

Unnamed: 0,Rk,Age,W,L,PW,PL,MOV,SOS,SRS,ORtg,DRtg,NRtg,Pace,FTr,3PAr,TS%,eFG%,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Attend.,Attend./G,Year
Rk,1.0,-0.439758,-0.944993,0.944993,-0.975365,0.975365,-0.966847,0.394901,-0.969098,-0.769648,0.628535,-0.967534,0.054257,-0.121471,-0.317864,-0.676106,-0.65893,0.22561,-0.121718,-0.178355,0.54573,-0.077163,-0.142453,0.242359,-0.416152,-0.418324,0.0
Age,-0.439758,1.0,0.487388,-0.487388,0.448929,-0.448929,0.44899,-0.169113,0.450992,0.345326,-0.305619,0.448629,-0.185252,0.069848,0.09129,0.334539,0.305183,-0.303278,-0.224734,0.135158,-0.242749,0.073489,0.003113,-0.202686,0.390614,0.391073,-0.059593
W,-0.944993,0.487388,1.0,-1.0,0.969301,-0.969301,0.970449,-0.47896,0.966544,0.767057,-0.636315,0.970361,-0.033733,0.068022,0.279425,0.696262,0.684748,-0.23998,0.063888,0.135517,-0.546373,0.10168,0.127466,-0.258759,0.426645,0.429972,0.0
L,0.944993,-0.487388,-1.0,1.0,-0.969301,0.969301,-0.970449,0.47896,-0.966544,-0.767057,0.636315,-0.970361,0.033733,-0.068022,-0.279425,-0.696262,-0.684748,0.23998,-0.063888,-0.135517,0.546373,-0.10168,-0.127466,0.258759,-0.426645,-0.429972,0.0
PW,-0.975365,0.448929,0.969301,-0.969301,1.0,-1.0,0.99775,-0.465639,0.995737,0.783491,-0.661764,0.998416,-0.04006,0.082627,0.293105,0.695064,0.6809,-0.258175,0.090968,0.149189,-0.567082,0.082091,0.172042,-0.269695,0.401971,0.404775,0.002217


In [98]:
statistics2= statistics2.loc[:, ['Team', 'Year', 'SOS']]

In [99]:
statistics2.head()

Unnamed: 0,Team,Year,SOS
0,GSW,2015.0,-0.09
1,LAC,2015.0,0.22
2,SAS,2015.0,0.14
3,ATL,2015.0,-0.68
4,POR,2015.0,0.18


In [100]:
df = pd.merge(statistics2, df, on = ['Year', 'Team'], how='outer')

In [101]:
#pd.merge(Ostatistics, dfS, on=['Year', 'Opponent'], how='outer')

In [102]:
df.head()

Unnamed: 0,Team,Year,SOS,Opponent,OProjW,TProjW,OppRating,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars,AllStarAdv
0,GSW,2015.0,-0.09,SAC,31.0,65.0,73.69,73.88,0.398406,3.448276,1.0,0.0,95.0,77.0,21.0,44.0,6.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,0.0,0.0
1,GSW,2015.0,-0.09,LAL,23.0,65.0,73.38,73.88,0.990099,3.448276,2.0,1.0,127.0,104.0,30.0,43.0,11.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,0.0
2,GSW,2015.0,-0.09,POR,53.0,65.0,75.0,73.88,1.960784,3.448276,3.0,0.0,95.0,90.0,22.0,41.0,6.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,0.0
3,GSW,2015.0,-0.09,LAC,58.0,65.0,76.15,73.88,10.0,3.448276,4.0,1.0,121.0,104.0,29.0,39.0,15.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,2.0,0.0
4,GSW,2015.0,-0.09,HOU,50.0,65.0,74.13,73.88,4.347826,3.448276,5.0,0.0,98.0,87.0,22.0,54.0,9.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,2.0,1.0,0.0


In [103]:
statistics3 = pd.read_csv('Ostatistics.csv')

In [104]:
statistics3 = statistics3.loc[:, ['Opponent', 'Year', 'SOS']]

In [105]:
#statistics3.rename(columns={'Team':'Opponent'}, inplace=True)

In [106]:
statistics3.head()

Unnamed: 0,Opponent,Year,SOS
0,GSW,2015.0,-0.09
1,LAC,2015.0,0.22
2,SAS,2015.0,0.14
3,ATL,2015.0,-0.68
4,POR,2015.0,0.18


In [107]:
df = pd.merge(statistics3, df, on = ['Year', 'Opponent'], how='outer')

In [108]:
df.head()

Unnamed: 0,Opponent,Year,SOS_x,Team,SOS_y,OProjW,TProjW,OppRating,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars,AllStarAdv
0,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,5.0,0.0,104.0,121.0,26.0,30.0,7.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
1,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,30.0,1.0,100.0,86.0,18.0,50.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
2,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,63.0,0.0,98.0,106.0,25.0,39.0,8.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
3,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,75.0,1.0,106.0,110.0,19.0,41.0,6.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
4,GSW,2015.0,-0.09,SAS,0.14,65.0,58.0,73.88,75.88,3.448276,22.222222,7.0,0.0,113.0,100.0,28.0,36.0,10.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0


In [109]:
df.rename(columns={'SOS_x':'OpSOS', 'SOS_y': 'TSOS' }, inplace=True)

In [110]:
df.head()

Unnamed: 0,Opponent,Year,OpSOS,Team,TSOS,OProjW,TProjW,OppRating,TeamRating,OpOdds,TOdds,Game,Home,TeamPoints,OpponentPoints,Assists,TotalRebounds,X3PointShots,TeamWin,TeamConf,OppConf,TMadeP,OppMadeP,TeamStronger,TeamBetterOdds,TeamAllStars,OppAllStars,AllStarAdv
0,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,5.0,0.0,104.0,121.0,26.0,30.0,7.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
1,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,30.0,1.0,100.0,86.0,18.0,50.0,9.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
2,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,63.0,0.0,98.0,106.0,25.0,39.0,8.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
3,GSW,2015.0,-0.09,LAC,0.22,65.0,58.0,73.88,76.15,3.448276,10.0,75.0,1.0,106.0,110.0,19.0,41.0,6.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.0
4,GSW,2015.0,-0.09,SAS,0.14,65.0,58.0,73.88,75.88,3.448276,22.222222,7.0,0.0,113.0,100.0,28.0,36.0,10.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0


In [111]:
#df.fillna(0)

In [112]:
df.dropna(how='any', inplace=True)

In [121]:
#df.loc[:,['Team']] = label_encoder.fit_transform(df.loc[:,['Team']]).astype('float64')
#df.loc[:,['Opponent']] = label_encoder.fit_transform(df.loc[:,['Opponent']]).astype('float64')

# Phase 2: Training Machine Learning Models

In [114]:
from sklearn.model_selection import cross_val_score
#column_trans = make_column_transformer(
 #   (OneHotEncoder(), ['Home' , 'TeamConf', 'OppConf' , 'TMadeP', 'OppMadeP']),
  #  remainder='passthrough')
#column_trans.fit_transform(X)
y = df['TeamWin'].values

In [123]:
# Logistic Regression - used to test whether features hinder/improve performance
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_score
from sklearn import metrics
from sklearn.metrics import f1_score


#
logreg = LogisticRegression(solver='lbfgs', max_iter = 5000)               

# Features to train upon
X = df.loc[:, ['Home', 'TeamConf', 'OppConf', 'TMadeP', 'OppMadeP', 'TOdds', 'OpOdds', 'TeamRating', 'OppRating','TeamAllStars', 'OppAllStars',
             'OProjW', 'TProjW', 'TSOS', 'OpSOS']]
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10, shuffle = True)
#X = df_pre.drop(['Winner'], axis = True)

#logreg = LogisticRegression(solver='lbfgs', max_iter = 2000)
#logreg.fit(X_train, y_train)
#y_pred = logreg.predict(X_test)
#Comparing actual value response value(y_test) and predicted response value (y_pred)
#print(metrics.accuracy_score(y_test, y_pred))
#print(f1_score(y_test, y_pred, average='weighted'))
#print(precision_score(y_test, y_pred, average='weighted'))


cross_val_score(logreg, X, y, cv=4, scoring='accuracy').mean()

0.6841541806373095

In [124]:
#df['Team'] = label_encoder.inverse_transform(df['Team'])
#df['Opponent'] = label_encoder.inverse_transform(df['Opponent'])

In [125]:
#grid.predict([[1.0, 0.0, 0.0, 1.0, 1.0, 0.007937, 1.960784,  0.0, 3.0, 1.0,49.0,54.0]])

In [737]:
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import cross_val_score
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="entropy", max_depth = 4)

#X = df.loc[:, ['Home', 'TeamConf', 'OppConf', 'TMadeP', 'OppMadeP', 'TOdds', 'OpOdds', 'TeamStronger',
 #             'TeamAllStars', 'OppAllStars', 'PastPoints','TFG', 'TAssists', 'T3s', 'Rebs']]
X = df.loc[:, ['Home', 'TeamConf', 'OppConf', 'TMadeP', 'OppMadeP', 'TOdds', 'OpOdds','TeamRating', 'OppRating', 'TeamAllStars', 'OppAllStars',
             'OProjW', 'TProjW', 'TSOS', 'OpSOS']]
y = df['TeamWin'].values

cross_val_score(clf, X, y, cv=4, scoring='accuracy').mean()

0.6643252431826627

In [426]:
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
clfs=RandomForestClassifier(criterion="entropy", max_depth=4)
#X = df.loc[:, ['Home', 'TeamConf', 'OppConf','TMadeP', 'OppMadeP', 'TOdds', 'OpOdds', 'TeamStronger',
 #             'TeamAllStars', 'OppAllStars', 'PastPoints','TFG', 'TAssists', 'T3s', 'Rebs']]
X = df.loc[:, ['Home', 'TeamConf', 'OppConf', 'TMadeP', 'OppMadeP', 'TOdds', 'OpOdds', 'TeamRating', 'OppRating', 'TeamAllStars', 'OppAllStars',
             'OProjW', 'TProjW', 'TSOS', 'OpSOS']]
cross_val_score(clfs, X, y, cv=4, scoring='accuracy').mean()

0.6709261374452693

In [784]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(10)
cross_val_score(knn, X, y, cv=4, scoring='accuracy').mean()

0.6480552004873414

In [116]:
#Naivee bayes classifier
from sklearn.naive_bayes import GaussianNB

#Create a Gaussian Classifier
gnb = GaussianNB()
X = df.loc[:, ['Home', 'TeamConf', 'OppConf', 'TMadeP', 'OppMadeP', 'TOdds', 'OpOdds','TeamRating', 'OppRating', 'TeamAllStars', 'OppAllStars',
             'OProjW', 'TProjW', 'TSOS', 'OpSOS']]

cross_val_score(gnb, X, y, cv=4, scoring='accuracy').mean()

0.6706303557347828

In [790]:
#SVM
from sklearn.svm import SVC

svclassifier = SVC(kernel='linear')


cross_val_score(svclassifier, X, y, cv=4, scoring='accuracy').mean()

0.6823237548190652

In [767]:
#XGB
import xgboost as xgb
from xgboost import XGBClassifier

est = XGBClassifier()
X = df.loc[:, ['Home', 'TeamConf', 'OppConf', 'TMadeP', 'OppMadeP', 'TOdds', 'OpOdds', 'TeamRating', 'OppRating', 'TeamAllStars', 'OppAllStars',
             'OProjW', 'TProjW', 'TSOS', 'OpSOS']]
cross_val_score(est, X, y, cv=4, scoring='accuracy').mean()

0.6820187941475346

# Phase 2: Parameter Tuning ML Algorithms

In [787]:
#Parameter Tuning - KNN
from sklearn.model_selection import GridSearchCV
k_range = list(range(1,80))
weight_options = ['uniform', 'distance']
param_grid = dict(n_neighbors = k_range, weights = weight_options)
grid = GridSearchCV(knn, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)


GridSearchCV(cv=4, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=10, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                         23, 24, 25, 26, 27, 28, 29, 30, ...],
                         'weights': ['uniform', 'distance']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [788]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)
#print(X)

0.6725603808066611
{'n_neighbors': 79, 'weights': 'uniform'}
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=79, p=2,
                     weights='uniform')


In [169]:
# Paramter Tuning Logistic Regression

from sklearn.model_selection import GridSearchCV
c_range = [0.001, 0.01, 0.1, 0.5, 1, 10,20,30,40,50,60,70,80,90, 100, 200, 500,1000]
solvers = ['newton-cg','lbfgs', 'liblinear', 'sag', 'saga']
param_grid = dict(C = c_range, solver = solvers)
grid = GridSearchCV(logreg, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)

GridSearchCV(cv=4, error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=5000, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1, 10, 20, 30, 40, 50, 60,
                               70, 80, 90, 100, 200, 500, 1000],
                         'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag',
                                    'saga']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
     

In [170]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.6846626414450032
{'C': 30, 'solver': 'lbfgs'}
LogisticRegression(C=30, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=5000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)


In [785]:
# Parameter Tuning Decision Tree Classifier
min_samples_leaf = [1,2,3,4,5,6]
max_depth = [1,2,3,4,5,6]
min_samples_split = [2,3,4,5]
criterion =['gini', 'entropy']

param_grid = dict(max_depth = max_depth, criterion = criterion)
grid = GridSearchCV(clf, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)

GridSearchCV(cv=4, error_score=nan,
             estimator=DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                              criterion='entropy', max_depth=4,
                                              max_features=None,
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              presort='deprecated',
                                              random_state=None,
                                              splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'criterion': ['gini', 'entropy'],
                   

In [786]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.6718497434409259
{'criterion': 'gini', 'max_depth': 5}
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=5, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')


In [744]:
# Parameter Tuning Random Forest Classifier
#min_samples_leaf = [1,2,3,4,5,6]
n_estimators = list(range(1,10))
max_depth =  list(range(1,10))
min_samples_leaf = [1,2,3,4,5,6]
min_samples_split = [2,3,4,5]
criterion =['gini', 'entropy']
#min_samples_split = [2,3,4,5]

param_grid = dict(max_depth = max_depth, n_estimators = n_estimators, min_samples_leaf = min_samples_leaf,
                 min_samples_split = min_samples_split, criterion = criterion)
grid = GridSearchCV(clfs, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)


GridSearchCV(cv=4, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='entropy', max_depth=4,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rando

In [745]:
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.6794769864508616
{'criterion': 'entropy', 'max_depth': 6, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 9}
RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=6, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=9,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)


In [120]:
# Parameter tuning Naiive Bayes
from sklearn.model_selection import GridSearchCV
smoothing = [0.00000001, 0.00001, 0.001, 0.01, 1, 10, 100, 200]
param_grid = {'var_smoothing': smoothing}
grid = GridSearchCV(gnb, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)

print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.6741873064887424
{'var_smoothing': 10}
GaussianNB(priors=None, var_smoothing=10)


In [791]:
# Parameter tuning SVM
Cs = [0.001, 0.01, 0.1, 1]
gammas = [0.001, 0.01, 0.1, 1]
#param_grid = dict(max_depth = max_depth, n_estimators = n_estimators)
param_grid = {'C': Cs, 'gamma' : gammas}
grid = GridSearchCV(svclassifier, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)

print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

0.6834422610552691
{'C': 0.01, 'gamma': 0.001}
SVC(C=0.01, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


In [765]:
# Parameter tuning XGB
param_grid = {"learning_rate"    : [0.01, 0.1, 0.05, 0.10, 0.15, 0.20, 0.5, 0.7, 1] ,
"max_depth"        : [ 1, 2, 3, 4, 5, 6, 8, 10, 12, 15]}

        # "min_child_weight" : [ 1, 3, 5, 7 ],
# "gamma"            : [ 0.0, 0.1, 0.2 , 0.3, 0.4 ],
 #"colsample_bytree" : [ 0.3, 0.4, 0.5 , 0.7 ] }

grid = GridSearchCV(est, param_grid, cv=4, scoring='accuracy', return_train_score=False)
grid.fit(X, y)

print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_) 

0.6850699312500433
{'learning_rate': 0.5, 'max_depth': 1}
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.5, max_delta_step=0, max_depth=1,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)


In [352]:
#grid.predict(X)
#grid.predict([[1.0, 0.0, 0.0, 1.0, 1.0, 0.007937, 1.960784,  0.0, 3.0, 1.0,49.0,54.0, 1.0, 2.0]])

array([1.])

In [113]:
#print(np.argmax(np.round(prediction[15])))

In [None]:
# Prediction

In [274]:
#Prediction Example
#outcome = grid.predict([[df['Home'][1], df['TeamConf'][1], df['OppConf'][1], df['TMadeP'][1], df['OppMadeP'][1], df['TOdds'][1], 
 #               df['OpOdds'][1], df['TeamStronger'][1], df['TeamAllStars'][1], df['OppAllStars'][1], df['OProjW'][1], df['TProjW'][1], 
  #              df['TSOS'][1], df['OpSOS'][1]]])

#print("Winner between ", df['Team'][1], "vs", df['Opponent'][1], "was: ")
#if (outcome == 0):
 #   print(df['Opponent'][1])
#else:
 #   print("Winner was ", df['Team'][1])

# Phase 3: Predicting 2018-19 NBA Playoffs

### Round 1

In [171]:
# Predicting 2018-19 NBA Playoffs

# Grab the data ->
#nbaPred['Team'] = ["MIL", "DET"]
East_r1S1 = {'Team': ["MIL", "MIL", "MIL", "MIL", "MIL", "MIL", "MIL",
                     "BOS","BOS","BOS","BOS","BOS","BOS","BOS",
                     "PHI","PHI","PHI","PHI","PHI","PHI","PHI",
                     "TOR","TOR","TOR","TOR","TOR","TOR","TOR"], 
             'Opponent': ["DET", "DET", "DET", "DET", "DET", "DET", "DET",
                    "IND","IND","IND","IND","IND","IND","IND",
                    "BRK","BRK","BRK","BRK","BRK","BRK","BRK",
                    "ORL","ORL","ORL","ORL","ORL","ORL","ORL"], 
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["East", "East", "East", "East", "East", "East", "East",
                   "East", "East", "East", "East", "East", "East", "East",
                   "East", "East", "East", "East", "East", "East", "East",
                   "East", "East", "East", "East", "East", "East", "East"], 
       'OppConf': ["East", "East", "East", "East", "East", "East", "East",
                  "East", "East", "East", "East", "East", "East", "East",
                  "East", "East", "East", "East", "East", "East", "East",
                  "East", "East", "East", "East", "East", "East", "East"],
      'TeamAllStars': [2, 2, 2, 2, 2, 2, 2,
                      1, 1, 1, 1, 1, 1, 1,
                      2, 2, 2, 2, 2, 2, 2,
                      2, 2, 2, 2, 2, 2, 2,], 
      'OppAllStars': [1, 1, 1, 1, 1, 1, 1,
                      1, 1, 1, 1, 1, 1, 1,
                      1, 1, 1, 1, 1, 1, 1,
                      1, 1, 1, 1, 1, 1, 1,
                     ], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y',
                'Y', 'Y','Y','Y','Y','Y','Y',
                'Y', 'Y','Y','Y','Y','Y','Y',
                'Y', 'Y','Y','Y','Y','Y','Y'],
      'OppMadeP': ['N', 'N','N','N','N','N','N',
                  'Y', 'Y','Y','Y','Y','Y','Y',
                  'N', 'N','N','N','N','N','N',
                  'N', 'N','N','N','N','N','N']}



West_r1S1 = {'Team': ["GSW", "GSW", "GSW", "GSW", "GSW", "GSW", "GSW",
                     "HOU","HOU","HOU","HOU","HOU","HOU","HOU",
                     "POR","POR","POR","POR","POR","POR","POR",
                     "DEN","DEN","DEN","DEN","DEN","DEN","DEN"], 
             'Opponent': ["LAC", "LAC", "LAC", "LAC", "LAC", "LAC", "LAC",
                    "UTA","UTA","UTA","UTA","UTA","UTA","UTA",
                    "OKC","OKC","OKC","OKC","OKC","OKC","OKC",
                    "SAS","SAS","SAS","SAS","SAS","SAS","SAS"], 
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["West", "West", "West", "West", "West", "West", "West",
                   "West", "West", "West", "West", "West", "West", "West",
                   "West", "West", "West", "West", "West", "West", "West",
                   "West", "West", "West", "West", "West", "West", "West",], 
       'OppConf': ["West", "West", "West", "West", "West", "West", "West",
                  "West", "West", "West", "West", "West", "West", "West",
                  "West", "West", "West", "West", "West", "West", "West",
                  "West", "West", "West", "West", "West", "West", "West",],
      'TeamAllStars': [3, 3, 3, 3, 3, 3, 3,
                      1, 1, 1, 1, 1, 1, 1,
                      1, 1, 1, 1, 1, 1, 1,
                      1, 1, 1, 1, 1, 1, 1], 
      'OppAllStars': [0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0,
                      2, 2, 2, 2, 2, 2, 2,
                      1, 1, 1, 1, 1, 1, 1,
                     ], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y',
                'Y', 'Y','Y','Y','Y','Y','Y',
                'Y', 'Y','Y','Y','Y','Y','Y',
                'N', 'N','N','N','N','N','N'],
      'OppMadeP': ['N', 'N','N','N','N','N','N',
                  'Y', 'Y','Y','Y','Y','Y','Y',
                  'Y', 'Y','Y','Y','Y','Y','Y',
                  'Y', 'Y','Y','Y','Y','Y','Y']}


nba_Epred = pd.DataFrame(East_r1S1)
nba_Wpred = pd.DataFrame(West_r1S1)

In [172]:
nba_Epred.head(28)

Unnamed: 0,Team,Opponent,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
0,MIL,DET,Home,East,East,2,1,Y,N
1,MIL,DET,Home,East,East,2,1,Y,N
2,MIL,DET,Away,East,East,2,1,Y,N
3,MIL,DET,Away,East,East,2,1,Y,N
4,MIL,DET,Home,East,East,2,1,Y,N
5,MIL,DET,Away,East,East,2,1,Y,N
6,MIL,DET,Home,East,East,2,1,Y,N
7,BOS,IND,Home,East,East,1,1,Y,Y
8,BOS,IND,Home,East,East,1,1,Y,Y
9,BOS,IND,Away,East,East,1,1,Y,Y


In [173]:
nba_Wpred.head(28)


Unnamed: 0,Team,Opponent,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
0,GSW,LAC,Home,West,West,3,0,Y,N
1,GSW,LAC,Home,West,West,3,0,Y,N
2,GSW,LAC,Away,West,West,3,0,Y,N
3,GSW,LAC,Away,West,West,3,0,Y,N
4,GSW,LAC,Home,West,West,3,0,Y,N
5,GSW,LAC,Away,West,West,3,0,Y,N
6,GSW,LAC,Home,West,West,3,0,Y,N
7,HOU,UTA,Home,West,West,1,0,Y,Y
8,HOU,UTA,Home,West,West,1,0,Y,Y
9,HOU,UTA,Away,West,West,1,0,Y,Y


In [174]:
TeamOdds_19= pd.read_csv('2019TeamOdds.csv') 
OppOdds_19= pd.read_csv('2019OppOdds.csv') 

In [175]:
TeamOdds_19 = TeamOdds_19.loc[:, ['Team', 'Odds']]
OppOdds_19 = OppOdds_19.loc[:, ['Opponent', 'Odds']]

In [176]:
tOdds = []
oOdds = []
def convertTOdds():
    for j in range(len(TeamOdds_19['Odds'])):
            if (TeamOdds_19['Odds'][j] > 0):
               # print(odds['Odds'][0])
               TeamOdds_19['Odds'][j] = (100/(TeamOdds_19['Odds'][j] + 100))*100
            #x = -187
            elif (TeamOdds_19['Odds'][j] < 0):  
                TeamOdds_19['Odds'][j] = ((-(TeamOdds_19['Odds'][j]))/((-(TeamOdds_19['Odds'][j])) + 100))*100

def convertOpOdds():
    for j in range(len(OppOdds_19['Odds'])):
            if (OppOdds_19['Odds'][j] > 0):
               # print(odds['Odds'][0])
               OppOdds_19['Odds'][j] = (100/(OppOdds_19['Odds'][j] + 100))*100
            #x = -187
            elif (OppOdds_19['Odds'][j] < 0):  
                OppOdds_19['Odds'][j] = ((-(OppOdds_19['Odds'][j]))/((-(OppOdds_19['Odds'][j])) + 100))*100

In [177]:
convertTOdds()
convertOpOdds()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [178]:
TeamOdds_19.head()

Unnamed: 0,Team,Odds
0,GSW,62
1,BOS,13
2,HOU,9
3,LAL,5
4,TOR,5


In [179]:
OppOdds_19.head()

Unnamed: 0,Opponent,Odds
0,GSW,62
1,BOS,13
2,HOU,9
3,LAL,5
4,TOR,5


In [180]:
nba_Epred = pd.merge(TeamOdds_19, nba_Epred, on = ['Team'], how='outer')
nba_E2pred = pd.merge(OppOdds_19, nba_Epred, on = ['Opponent'], how='outer')

nba_Wpred = pd.merge(TeamOdds_19, nba_Wpred, on = ['Team'], how='outer')
nba_W2pred = pd.merge(OppOdds_19, nba_Wpred, on = ['Opponent'], how='outer')

In [181]:
nba_E2pred.dropna(how='any', inplace=True)
nba_W2pred.dropna(how='any', inplace=True)

In [182]:
nba_E2pred.head()

Unnamed: 0,Opponent,Odds_x,Team,Odds_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
12,IND,0.0,BOS,13.0,Home,East,East,1.0,1.0,Y,Y
13,IND,0.0,BOS,13.0,Home,East,East,1.0,1.0,Y,Y
14,IND,0.0,BOS,13.0,Away,East,East,1.0,1.0,Y,Y
15,IND,0.0,BOS,13.0,Away,East,East,1.0,1.0,Y,Y
16,IND,0.0,BOS,13.0,Home,East,East,1.0,1.0,Y,Y


In [183]:
nba_W2pred.head()

Unnamed: 0,Opponent,Odds_x,Team,Odds_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
6,OKC,2.0,POR,0.0,Home,West,West,1.0,2.0,Y,Y
7,OKC,2.0,POR,0.0,Home,West,West,1.0,2.0,Y,Y
8,OKC,2.0,POR,0.0,Away,West,West,1.0,2.0,Y,Y
9,OKC,2.0,POR,0.0,Away,West,West,1.0,2.0,Y,Y
10,OKC,2.0,POR,0.0,Home,West,West,1.0,2.0,Y,Y


In [184]:
# Add SOS and Proj Wins
Tstats_19 = pd.read_csv('Tstats19.csv')
Tstats_19 = Tstats_19.loc[:, ['Team', 'PW', 'SOS']]

Ostats_19 = pd.read_csv('Ostats19.csv')
Ostats_19 = Ostats_19.loc[:, ['Opponent', 'PW', 'SOS']]

# Eastern Round
#nba_Wpred = pd.merge(TeamOdds_19, nba_Wpred, on = ['Team'], how='outer')

In [185]:
Tstats_19.head()

Unnamed: 0,Team,PW,SOS
0,MIL,61,-0.82
1,GSW,56,-0.04
2,TOR,56,-0.6
3,UTA,54,0.03
4,HOU,53,0.19


In [186]:
Ostats_19.head()

Unnamed: 0,Opponent,PW,SOS
0,MIL,61,-0.82
1,GSW,56,-0.04
2,TOR,56,-0.6
3,UTA,54,0.03
4,HOU,53,0.19


In [187]:
# Merge each conference
# Eastern Round
nba_ER3 = pd.merge(Tstats_19, nba_E2pred, on = ['Team'], how='outer')
nba_E4 = pd.merge(Ostats_19, nba_ER3, on = ['Opponent'], how='outer')
#nba_WestR1pred = pd.merge(TeamOdds_19, nba_Wpred, on = ['Team'], how='outer')

In [188]:
nba_E4.dropna(how='any', inplace=True)

In [189]:
nba_E4.head() #OpOdds	TOdds OProjW	TProjW	

Unnamed: 0,Opponent,PW_x,SOS_x,Team,PW_y,SOS_y,Odds_x,Odds_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
9,IND,50.0,-0.57,BOS,52.0,-0.54,0.0,13.0,Home,East,East,1.0,1.0,Y,Y
10,IND,50.0,-0.57,BOS,52.0,-0.54,0.0,13.0,Home,East,East,1.0,1.0,Y,Y
11,IND,50.0,-0.57,BOS,52.0,-0.54,0.0,13.0,Away,East,East,1.0,1.0,Y,Y
12,IND,50.0,-0.57,BOS,52.0,-0.54,0.0,13.0,Away,East,East,1.0,1.0,Y,Y
13,IND,50.0,-0.57,BOS,52.0,-0.54,0.0,13.0,Home,East,East,1.0,1.0,Y,Y


In [190]:
# Import Roster Rating
roster19 = pd.read_csv('roster19.csv')

In [191]:
roster19 = roster19.loc[:, ['Team', 'Rating']]

In [192]:
roster19.head()

Unnamed: 0,Team,Rating
0,GSW,79.27
1,HOU,76.88
2,MIA,76.62
3,OKC,76.53
4,LAL,76.38


In [193]:
nba_E5 = pd.merge(roster19, nba_E4, on = ['Team'], how='outer')
#nba_EastR1 = pd.merge(Oroster19, nba_E5, on = ['Opponent'], how='outer')
roster19.rename(columns={'Team': 'Opponent'}, inplace=True)
roster19.head()

Unnamed: 0,Opponent,Rating
0,GSW,79.27
1,HOU,76.88
2,MIA,76.62
3,OKC,76.53
4,LAL,76.38


In [194]:
nba_EastR1 = pd.merge(roster19, nba_E5, on = ['Opponent'], how='outer')

In [195]:
nba_EastR1.dropna(how='any', inplace=True)

In [196]:
nba_EastR1.head()

Unnamed: 0,Opponent,Rating_x,Team,Rating_y,PW_x,SOS_x,PW_y,SOS_y,Odds_x,Odds_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
16,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,Home,East,East,2.0,1.0,Y,N
17,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,Home,East,East,2.0,1.0,Y,N
18,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,Away,East,East,2.0,1.0,Y,N
19,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,Away,East,East,2.0,1.0,Y,N
20,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,Home,East,East,2.0,1.0,Y,N


In [197]:
nba_EastR1.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

In [198]:
nba_WR3 = pd.merge(Tstats_19, nba_W2pred, on = ['Team'], how='outer')
nba_W4 = pd.merge(Ostats_19, nba_WR3, on = ['Opponent'], how='outer')

nba_W4.dropna(how='any', inplace=True)

In [199]:
nba_W4.head()

Unnamed: 0,Opponent,PW_x,SOS_x,Team,PW_y,SOS_y,Odds_x,Odds_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
3,UTA,54.0,0.03,HOU,53.0,0.19,0.0,9.0,Home,West,West,1.0,0.0,Y,Y
4,UTA,54.0,0.03,HOU,53.0,0.19,0.0,9.0,Home,West,West,1.0,0.0,Y,Y
5,UTA,54.0,0.03,HOU,53.0,0.19,0.0,9.0,Away,West,West,1.0,0.0,Y,Y
6,UTA,54.0,0.03,HOU,53.0,0.19,0.0,9.0,Away,West,West,1.0,0.0,Y,Y
7,UTA,54.0,0.03,HOU,53.0,0.19,0.0,9.0,Home,West,West,1.0,0.0,Y,Y


In [200]:
# Import Roster Rating
roster19 = pd.read_csv('roster19.csv')
roster19 = roster19.loc[:, ['Team', 'Rating']]
nba_W5 = pd.merge(roster19, nba_W4, on = ['Team'], how='outer')
#nba_EastR1 = pd.merge(Oroster19, nba_E5, on = ['Opponent'], how='outer')
roster19.rename(columns={'Team': 'Opponent'}, inplace=True)

In [201]:
nba_WestR1 = pd.merge(roster19, nba_W5, on = ['Opponent'], how='outer')
nba_WestR1.dropna(how='any', inplace=True)

In [202]:
nba_WestR1.head(9)

Unnamed: 0,Opponent,Rating_x,Team,Rating_y,PW_x,SOS_x,PW_y,SOS_y,Odds_x,Odds_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
3,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y
4,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y
5,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Away,West,West,1.0,2.0,Y,Y
6,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Away,West,West,1.0,2.0,Y,Y
7,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y
8,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Away,West,West,1.0,2.0,Y,Y
9,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y
16,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,Home,West,West,1.0,1.0,N,Y
17,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,Home,West,West,1.0,1.0,N,Y


In [203]:
nba_WestR1.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

In [204]:
#nba_WestR1['Team'][3] #Convert using encoder

In [205]:
nba_WestR1['TeamStronger'] = nba_WestR1['TeamRating'] > nba_WestR1['OppRating']
nba_EastR1['TeamStronger'] = nba_EastR1['TeamRating'] > nba_EastR1['OppRating']

In [206]:
nba_WestR1

Unnamed: 0,Opponent,OppRating,Team,TeamRating,OProjW,OSOS,TProjW,TSOS,OpOdds,TOdds,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP,TeamStronger
3,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y,False
4,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y,False
5,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Away,West,West,1.0,2.0,Y,Y,False
6,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Away,West,West,1.0,2.0,Y,Y,False
7,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y,False
8,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Away,West,West,1.0,2.0,Y,Y,False
9,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,Home,West,West,1.0,2.0,Y,Y,False
16,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,Home,West,West,1.0,1.0,N,Y,True
17,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,Home,West,West,1.0,1.0,N,Y,True
18,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,Away,West,West,1.0,1.0,N,Y,True


In [207]:
nba_WestR1.loc[:,['Home']] = label_encoder.fit_transform(nba_WestR1.loc[:,['Home']]).astype('float64')
nba_WestR1.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_WestR1.loc[:,['TeamConf']]).astype('float64')
nba_WestR1.loc[:,['OppConf']] = label_encoder.fit_transform(nba_WestR1.loc[:,['OppConf']]).astype('float64')
nba_WestR1.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_WestR1.loc[:,['TMadeP']]).astype('float64')
nba_WestR1.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_WestR1.loc[:,['OppMadeP']]).astype('float64')
nba_WestR1.loc[:,['TeamStronger']] = label_encoder.fit_transform(nba_WestR1.loc[:,['TeamStronger']]).astype('float64')

nba_EastR1.loc[:,['Home']] = label_encoder.fit_transform(nba_EastR1.loc[:,['Home']]).astype('float64')
nba_EastR1.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_EastR1.loc[:,['TeamConf']]).astype('float64')
nba_EastR1.loc[:,['OppConf']] = label_encoder.fit_transform(nba_EastR1.loc[:,['OppConf']]).astype('float64')
nba_EastR1.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_EastR1.loc[:,['TMadeP']]).astype('float64')
nba_EastR1.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_EastR1.loc[:,['OppMadeP']]).astype('float64')
nba_EastR1.loc[:,['TeamStronger']] = label_encoder.fit_transform(nba_EastR1.loc[:,['TeamStronger']]).astype('float64')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [208]:
nba_WestR1.reset_index(drop=True,inplace=True)
nba_EastR1.reset_index(drop=True,inplace=True)

In [209]:
nba_WestR1

Unnamed: 0,Opponent,OppRating,Team,TeamRating,OProjW,OSOS,TProjW,TSOS,OpOdds,TOdds,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP,TeamStronger
0,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
1,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
2,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
3,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
4,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
5,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
6,OKC,76.53,POR,75.0,50.0,0.15,51.0,0.24,2.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,1.0,0.0
7,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
8,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
9,SAS,75.8,DEN,75.94,45.0,0.12,51.0,0.24,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0


In [210]:
# Using best model
#logregression = LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
#                   intercept_scaling=1, l1_ratio=None, max_iter=5000,
 #                  multi_class='auto', n_jobs=None, penalty='l2',
  #                 random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
   #                warm_start=False)

#logregression.fit(X,y)

In [211]:
nba_EastR1

Unnamed: 0,Opponent,OppRating,Team,TeamRating,OProjW,OSOS,TProjW,TSOS,OpOdds,TOdds,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP,TeamStronger
0,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
1,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
2,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
3,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
4,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
5,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
6,BRK,75.33,PHI,76.21,41.0,-0.33,48.0,-0.44,0.0,4.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0
7,IND,74.5,BOS,76.17,50.0,-0.57,52.0,-0.54,0.0,13.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
8,IND,74.5,BOS,76.17,50.0,-0.57,52.0,-0.54,0.0,13.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
9,IND,74.5,BOS,76.17,50.0,-0.57,52.0,-0.54,0.0,13.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0


In [212]:
#Eastern Conference Round 1 Prediction Example
PHIwins = 0
BRKwins = 0
for i in range(0,7): #0-6
    
    if (PHIwins == 4 or BRKwins == 4):
        break
    outcomes = grid.predict([[nba_EastR1['Home'][i], nba_EastR1['TeamConf'][i], nba_EastR1['OppConf'][i], nba_EastR1['TMadeP'][i], 
                             nba_EastR1['OppMadeP'][i], nba_EastR1['TOdds'][i], nba_EastR1['OpOdds'][i], nba_EastR1['TeamRating'][i], nba_EastR1['OppRating'][i], nba_EastR1['TeamAllStars'][i], 
                             nba_EastR1['OppAllStars'][i], nba_EastR1['OProjW'][i], nba_EastR1['TProjW'][i], 
                             nba_EastR1['TSOS'][i], nba_EastR1['OSOS'][i]]])

    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastR1['Opponent'][0])
        BRKwins = BRKwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastR1['Team'][0])
        PHIwins = PHIwins + 1

print('############################################################################################################################')

BOSwins = 0
INDwins = 0
for i in range(7, 14): #7-13
   
    if (BOSwins == 4 or INDwins == 4):
        break

    outcomes = grid.predict([[nba_EastR1['Home'][i], nba_EastR1['TeamConf'][i], nba_EastR1['OppConf'][i], nba_EastR1['TMadeP'][i], 
                             nba_EastR1['OppMadeP'][i], nba_EastR1['TOdds'][i], nba_EastR1['OpOdds'][i], nba_EastR1['TeamRating'][i], nba_EastR1['OppRating'][i], nba_EastR1['TeamAllStars'][i], 
                             nba_EastR1['OppAllStars'][i], nba_EastR1['OProjW'][i], nba_EastR1['TProjW'][i], 
                             nba_EastR1['TSOS'][i], nba_EastR1['OSOS'][i]]])

   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastR1['Opponent'][7])
        INDwins = INDwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastR1['Team'][7])
        BOSwins = BOSwins + 1
        
print('############################################################################################################################')        
MILwins = 0
DETwins = 0
for i in range(14, 21): #14-20
   
    if (MILwins == 4 or DETwins == 4):
        break
        outcomes = grid.predict([[nba_EastR1['Home'][i], nba_EastR1['TeamConf'][i], nba_EastR1['OppConf'][i], nba_EastR1['TMadeP'][i], 
                             nba_EastR1['OppMadeP'][i], nba_EastR1['TOdds'][i], nba_EastR1['OpOdds'][i], nba_EastR1['TeamRating'][i], nba_EastR1['OppRating'][i], nba_EastR1['TeamAllStars'][i], 
                             nba_EastR1['OppAllStars'][i], nba_EastR1['OProjW'][i], nba_EastR1['TProjW'][i], 
                             nba_EastR1['TSOS'][i], nba_EastR1['OSOS'][i]]])

   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastR1['Opponent'][14])
        DETwins = DETwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastR1['Team'][14])
        MILwins = MILwins + 1
print('############################################################################################################################')        
TORwins = 0
ORLwins = 0
for i in range(21, 28): #21-27
    if (TORwins == 4 or ORLwins == 4):
        break
        outcomes = grid.predict([[nba_EastR1['Home'][i], nba_EastR1['TeamConf'][i], nba_EastR1['OppConf'][i], nba_EastR1['TMadeP'][i], 
                             nba_EastR1['OppMadeP'][i], nba_EastR1['TOdds'][i], nba_EastR1['OpOdds'][i], nba_EastR1['TeamRating'][i], nba_EastR1['OppRating'][i], nba_EastR1['TeamAllStars'][i], 
                             nba_EastR1['OppAllStars'][i], nba_EastR1['OProjW'][i], nba_EastR1['TProjW'][i], 
                             nba_EastR1['TSOS'][i], nba_EastR1['OSOS'][i]]])
   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastR1['Opponent'][21])
        ORLwins = ORLwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastR1['Team'][21])
        TORwins = TORwins + 1

print("Final Result: ", nba_EastR1['Team'][0], ": ", PHIwins, "  ", nba_EastR1['Opponent'][0], ": ", BRKwins)
print("Final Result: ", nba_EastR1['Team'][7], ": ", BOSwins, "  ", nba_EastR1['Opponent'][7], ": ", INDwins)
print("Final Result: ", nba_EastR1['Team'][14], ": ", MILwins, "  ", nba_EastR1['Opponent'][14], ": ", DETwins)
print("Final Result: ", nba_EastR1['Team'][21], ": ", TORwins, "  ", nba_EastR1['Opponent'][21], ": ", ORLwins)



Winner was  PHI
Winner was  PHI
Winner was  BRK
Winner was  BRK
Winner was  PHI
Winner was  BRK
Winner was  PHI
############################################################################################################################
Winner was  BOS
Winner was  BOS
Winner was  IND
Winner was  IND
Winner was  BOS
Winner was  IND
Winner was  BOS
############################################################################################################################
Winner was  MIL
Winner was  MIL
Winner was  MIL
Winner was  MIL
############################################################################################################################
Winner was  TOR
Winner was  TOR
Winner was  TOR
Winner was  TOR
Final Result:  PHI :  4    BRK :  3
Final Result:  BOS :  4    IND :  3
Final Result:  MIL :  4    DET :  0
Final Result:  TOR :  4    ORL :  0


In [213]:
#Western Conference Round 1 Prediction Example
Porwins = 0
OKCwins = 0
for i in range(0,7): #0-6
    
    if (Porwins == 4 or OKCwins == 4):
        break
    outcomes = grid.predict([[nba_WestR1['Home'][i], nba_WestR1['TeamConf'][i], nba_WestR1['OppConf'][i], nba_WestR1['TMadeP'][i], 
                             nba_WestR1['OppMadeP'][i], nba_WestR1['TOdds'][i], nba_WestR1['OpOdds'][i], nba_WestR1['TeamRating'][i], nba_WestR1['OppRating'][i], nba_WestR1['TeamAllStars'][i], 
                             nba_WestR1['OppAllStars'][i], nba_WestR1['OProjW'][i], nba_WestR1['TProjW'][i], 
                             nba_WestR1['TSOS'][i], nba_WestR1['OSOS'][i]]])

    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestR1['Opponent'][0])
        OKCwins = OKCwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestR1['Team'][0])
        Porwins = Porwins + 1

print('############################################################################################################################')

SASwins = 0
DENwins = 0
for i in range(7, 14): #7-13
   
    if (SASwins == 4 or DENwins == 4):
        break

    outcomes = grid.predict([[nba_WestR1['Home'][i], nba_WestR1['TeamConf'][i], nba_WestR1['OppConf'][i], nba_WestR1['TMadeP'][i], 
                             nba_WestR1['OppMadeP'][i], nba_WestR1['TOdds'][i], nba_WestR1['OpOdds'][i], nba_WestR1['TeamRating'][i], nba_WestR1['OppRating'][i], nba_WestR1['TeamAllStars'][i], 
                             nba_WestR1['OppAllStars'][i], nba_WestR1['OProjW'][i], nba_WestR1['TProjW'][i], 
                             nba_WestR1['TSOS'][i], nba_WestR1['OSOS'][i]]])

   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestR1['Opponent'][7])
        SASwins = SASwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestR1['Team'][7])
        DENwins = DENwins + 1
        
print('############################################################################################################################')        
HOUwins = 0
UTAwins = 0
for i in range(14, 21): #14-20
   
    if (HOUwins == 4 or UTAwins == 4):
        break
    outcomes = grid.predict([[nba_WestR1['Home'][i], nba_WestR1['TeamConf'][i], nba_WestR1['OppConf'][i], nba_WestR1['TMadeP'][i], 
                             nba_WestR1['OppMadeP'][i], nba_WestR1['TOdds'][i], nba_WestR1['OpOdds'][i], nba_WestR1['TeamRating'][i], nba_WestR1['OppRating'][i], nba_WestR1['TeamAllStars'][i], 
                             nba_WestR1['OppAllStars'][i], nba_WestR1['OProjW'][i], nba_WestR1['TProjW'][i], 
                             nba_WestR1['TSOS'][i], nba_WestR1['OSOS'][i]]])
   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestR1['Opponent'][14])
        UTAwins = UTAwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestR1['Team'][14])
        HOUwins = HOUwins + 1
print('############################################################################################################################')        
GSWwins = 0
LACwins = 0
for i in range(21, 28): #21-27
    if (GSWwins == 4 or LACwins == 4):
        break
    outcomes = grid.predict([[nba_WestR1['Home'][i], nba_WestR1['TeamConf'][i], nba_WestR1['OppConf'][i], nba_WestR1['TMadeP'][i], 
                             nba_WestR1['OppMadeP'][i], nba_WestR1['TOdds'][i], nba_WestR1['OpOdds'][i], nba_WestR1['TeamRating'][i], nba_WestR1['OppRating'][i], nba_WestR1['TeamAllStars'][i], 
                             nba_WestR1['OppAllStars'][i], nba_WestR1['OProjW'][i], nba_WestR1['TProjW'][i], 
                             nba_WestR1['TSOS'][i], nba_WestR1['OSOS'][i]]])

   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestR1['Opponent'][21])
        LACwins = LACwins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestR1['Team'][21])
        GSWwins = GSWwins + 1

print("Final Result: ", nba_WestR1['Team'][0], ": ", Porwins, "  ", nba_WestR1['Opponent'][0], ": ", OKCwins)
print("Final Result: ", nba_WestR1['Team'][7], ": ", DENwins, "  ", nba_WestR1['Opponent'][7], ": ", SASwins)
print("Final Result: ", nba_WestR1['Team'][14], ": ", HOUwins, "  ", nba_WestR1['Opponent'][14], ": ", UTAwins)
print("Final Result: ", nba_WestR1['Team'][21], ": ", GSWwins, "  ", nba_WestR1['Opponent'][21], ": ", LACwins)



Winner was  POR
Winner was  POR
Winner was  OKC
Winner was  OKC
Winner was  POR
Winner was  OKC
Winner was  POR
############################################################################################################################
Winner was  DEN
Winner was  DEN
Winner was  SAS
Winner was  SAS
Winner was  DEN
Winner was  SAS
Winner was  DEN
############################################################################################################################
Winner was  HOU
Winner was  HOU
Winner was  UTA
Winner was  UTA
Winner was  HOU
Winner was  UTA
Winner was  HOU
############################################################################################################################
Winner was  GSW
Winner was  GSW
Winner was  GSW
Winner was  GSW
Final Result:  POR :  4    OKC :  3
Final Result:  DEN :  4    SAS :  3
Final Result:  HOU :  4    UTA :  3
Final Result:  GSW :  4    LAC :  0


### Round 2

In [214]:
#Second round, #Conf Finals # Finals -use merge

In [215]:
East_r1S2 = {'Team': ["MIL", "MIL", "MIL", "MIL", "MIL", "MIL", "MIL",
                     "TOR","TOR","TOR","TOR","TOR","TOR","TOR"], 
             'Opponent': ["BOS","BOS","BOS","BOS","BOS","BOS","BOS",
                     "PHI","PHI","PHI","PHI","PHI","PHI","PHI"],
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["East", "East", "East", "East", "East", "East", "East",
                   "East", "East", "East", "East", "East", "East", "East"], 
       'OppConf': ["East", "East", "East", "East", "East", "East", "East",
                  "East", "East", "East", "East", "East", "East", "East"],
      'TeamAllStars': [2, 2, 2, 2, 2, 2, 2,
                      2, 2, 2, 2, 2, 2, 2], 
      'OppAllStars': [1, 1, 1, 1, 1, 1, 1,
                      2, 2, 2, 2, 2, 2, 2], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y',
                'Y', 'Y','Y','Y','Y','Y','Y'],
      'OppMadeP': ['Y', 'Y','Y','Y','Y','Y','Y',
                  'Y', 'Y','Y','Y','Y','Y','Y']}


West_r1S2 = {'Team': ["GSW", "GSW", "GSW", "GSW", "GSW", "GSW", "GSW",
                     "DEN","DEN","DEN","DEN","DEN","DEN","DEN"], 
             'Opponent': ["HOU","HOU","HOU","HOU","HOU","HOU","HOU",
                     "POR","POR","POR","POR","POR","POR","POR"],
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home",
               "Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["West", "West", "West", "West", "West", "West", "West",
                   "West", "West", "West", "West", "West", "West", "West"], 
       'OppConf': ["West", "West", "West", "West", "West", "West", "West",
                  "West", "West", "West", "West", "West", "West", "West"],
      'TeamAllStars': [3, 3, 3, 3, 3, 3, 3,
                      1, 1, 1, 1, 1, 1, 1], 
      'OppAllStars': [1, 1, 1, 1, 1, 1, 1,
                      1, 1, 1, 1, 1, 1, 1], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y',
                'N', 'N','N','N','N','N','N'],
      'OppMadeP': ['Y', 'Y','Y','Y','Y','Y','Y',
                  'Y', 'Y','Y','Y','Y','Y','Y']}
        
        
east_semi = pd.DataFrame(East_r1S2)
west_semi =  pd.DataFrame(West_r1S2)

In [216]:
east_semi

Unnamed: 0,Team,Opponent,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
0,MIL,BOS,Home,East,East,2,1,Y,Y
1,MIL,BOS,Home,East,East,2,1,Y,Y
2,MIL,BOS,Away,East,East,2,1,Y,Y
3,MIL,BOS,Away,East,East,2,1,Y,Y
4,MIL,BOS,Home,East,East,2,1,Y,Y
5,MIL,BOS,Away,East,East,2,1,Y,Y
6,MIL,BOS,Home,East,East,2,1,Y,Y
7,TOR,PHI,Home,East,East,2,2,Y,Y
8,TOR,PHI,Home,East,East,2,2,Y,Y
9,TOR,PHI,Away,East,East,2,2,Y,Y


In [217]:
#Tstats_19 = pd.read_csv('Tstats19.csv')
#Ostats_19 = pd.read_csv('Ostats19.csv')

nba_e6 = pd.merge(Tstats_19, east_semi, on = ['Team'], how='outer')
nba_e7 = pd.merge(Ostats_19, nba_e6, on = ['Opponent'], how='outer')
nba_e7.dropna(how='any', inplace=True)


nba_w6 = pd.merge(Tstats_19, west_semi, on = ['Team'], how='outer')
nba_w7 = pd.merge(Ostats_19, nba_w6, on = ['Opponent'], how='outer')
nba_w7.dropna(how='any', inplace=True)


#nba_E5 = pd.merge(roster19, nba_E4, on = ['Team'], how='outer')
#nba_EastR1 = pd.merge(Oroster19, nba_E5, on = ['Opponent'], how='outer')
#roster19.rename(columns={'Team': 'Opponent'}, inplace=True)

In [218]:
#Add odds, rating, sos, projw
nba_e7.head()

Unnamed: 0,Opponent,PW_x,SOS_x,Team,PW_y,SOS_y,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
7,BOS,52.0,-0.54,MIL,61.0,-0.82,Home,East,East,2.0,1.0,Y,Y
8,BOS,52.0,-0.54,MIL,61.0,-0.82,Home,East,East,2.0,1.0,Y,Y
9,BOS,52.0,-0.54,MIL,61.0,-0.82,Away,East,East,2.0,1.0,Y,Y
10,BOS,52.0,-0.54,MIL,61.0,-0.82,Away,East,East,2.0,1.0,Y,Y
11,BOS,52.0,-0.54,MIL,61.0,-0.82,Home,East,East,2.0,1.0,Y,Y


In [219]:
#roster19.head()
roster19.rename(columns={'Opponent': 'Team'}, inplace=True)
nba_e8 = pd.merge(roster19, nba_e7, on = ['Team'], how='outer')
nba_w8 = pd.merge(roster19, nba_w7, on = ['Team'], how='outer')
roster19.rename(columns={'Team': 'Opponent'}, inplace=True)
nba_e9 = pd.merge(roster19, nba_e8, on = ['Opponent'], how='outer')
nba_w9 = pd.merge(roster19, nba_w8, on = ['Opponent'], how='outer')
nba_e9.dropna(how='any', inplace=True)
nba_w9.dropna(how='any', inplace=True)
#nba_E5 = pd.merge(roster19, nba_E4, on = ['Team'], how='outer')
#nba_EastR1 = pd.merge(Oroster19, nba_E5, on = ['Opponent'], how='outer')
#roster19.rename(columns={'Team': 'Opponent'}, inplace=True)

nba_E2pred = pd.merge(TeamOdds_19, nba_e9, on = ['Team'], how='outer')
nba_EastR2= pd.merge(OppOdds_19, nba_E2pred, on = ['Opponent'], how='outer')
nba_EastR2.dropna(how='any', inplace=True)

nba_W2pred = pd.merge(TeamOdds_19, nba_w9, on = ['Team'], how='outer')
nba_WestR2= pd.merge(OppOdds_19, nba_W2pred, on = ['Opponent'], how='outer')
nba_WestR2.dropna(how='any', inplace=True)
#Add Odds


In [220]:
nba_EastR2.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

nba_WestR2.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

In [221]:
nba_EastR2.head()

Unnamed: 0,Opponent,OpOdds,Team,TOdds,OppRating,TeamRating,OProjW,OSOS,TProjW,TSOS,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
1,BOS,13.0,MIL,0.0,76.17,75.68,52.0,-0.54,61.0,-0.82,Home,East,East,2.0,1.0,Y,Y
2,BOS,13.0,MIL,0.0,76.17,75.68,52.0,-0.54,61.0,-0.82,Home,East,East,2.0,1.0,Y,Y
3,BOS,13.0,MIL,0.0,76.17,75.68,52.0,-0.54,61.0,-0.82,Away,East,East,2.0,1.0,Y,Y
4,BOS,13.0,MIL,0.0,76.17,75.68,52.0,-0.54,61.0,-0.82,Away,East,East,2.0,1.0,Y,Y
5,BOS,13.0,MIL,0.0,76.17,75.68,52.0,-0.54,61.0,-0.82,Home,East,East,2.0,1.0,Y,Y


In [222]:
nba_WestR2.head()

Unnamed: 0,Opponent,OpOdds,Team,TOdds,OppRating,TeamRating,OProjW,OSOS,TProjW,TSOS,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
2,HOU,9.0,GSW,62.0,76.88,79.27,53.0,0.19,56.0,-0.04,Home,West,West,3.0,1.0,Y,Y
3,HOU,9.0,GSW,62.0,76.88,79.27,53.0,0.19,56.0,-0.04,Home,West,West,3.0,1.0,Y,Y
4,HOU,9.0,GSW,62.0,76.88,79.27,53.0,0.19,56.0,-0.04,Away,West,West,3.0,1.0,Y,Y
5,HOU,9.0,GSW,62.0,76.88,79.27,53.0,0.19,56.0,-0.04,Away,West,West,3.0,1.0,Y,Y
6,HOU,9.0,GSW,62.0,76.88,79.27,53.0,0.19,56.0,-0.04,Home,West,West,3.0,1.0,Y,Y


In [223]:
nba_WestR2.reset_index(drop=True,inplace=True)
nba_EastR2.reset_index(drop=True,inplace=True)

In [224]:
#Label Encoder
nba_EastR2.loc[:,['Home']] = label_encoder.fit_transform(nba_EastR2.loc[:,['Home']]).astype('float64')
nba_EastR2.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_EastR2.loc[:,['TeamConf']]).astype('float64')
nba_EastR2.loc[:,['OppConf']] = label_encoder.fit_transform(nba_EastR2.loc[:,['OppConf']]).astype('float64')
nba_EastR2.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_EastR2.loc[:,['TMadeP']]).astype('float64')
nba_EastR2.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_EastR2.loc[:,['OppMadeP']]).astype('float64')

nba_WestR2.loc[:,['Home']] = label_encoder.fit_transform(nba_WestR2.loc[:,['Home']]).astype('float64')
nba_WestR2.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_WestR2.loc[:,['TeamConf']]).astype('float64')
nba_WestR2.loc[:,['OppConf']] = label_encoder.fit_transform(nba_WestR2.loc[:,['OppConf']]).astype('float64')
nba_WestR2.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_WestR2.loc[:,['TMadeP']]).astype('float64')
nba_WestR2.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_WestR2.loc[:,['OppMadeP']]).astype('float64')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [225]:
#Eastern Conference Round 2 Prediction Example
T1wins = 0
T2wins = 0
for i in range(0,7): #0-6
    
    if (T1wins == 4 or T2wins == 4):
        break
    outcomes = grid.predict([[nba_EastR2['Home'][i], nba_EastR2['TeamConf'][i], nba_EastR2['OppConf'][i], nba_EastR2['TMadeP'][i], 
                             nba_EastR2['OppMadeP'][i], nba_EastR2['TOdds'][i], nba_EastR2['OpOdds'][i], nba_EastR2['TeamRating'][i], 
                              nba_EastR2['OppRating'][i], nba_EastR2['TeamAllStars'][i], 
                             nba_EastR2['OppAllStars'][i], nba_EastR2['OProjW'][i], nba_EastR2['TProjW'][i], 
                             nba_EastR2['TSOS'][i], nba_EastR2['OSOS'][i]]])

    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastR2['Opponent'][0])
        T2wins = T2wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastR2['Team'][0])
        T1wins = T1wins + 1

print('############################################################################################################################')

T3wins = 0
T4wins = 0
for i in range(7, 14): #7-13
   
    if (T3wins == 4 or T4wins == 4):
          break

    outcomes = grid.predict([[nba_EastR2['Home'][i], nba_EastR2['TeamConf'][i], nba_EastR2['OppConf'][i], nba_EastR2['TMadeP'][i], 
                             nba_EastR2['OppMadeP'][i], nba_EastR2['TOdds'][i], nba_EastR2['OpOdds'][i], nba_EastR2['TeamRating'][i], 
                              nba_EastR2['OppRating'][i], nba_EastR2['TeamAllStars'][i], 
                             nba_EastR2['OppAllStars'][i], nba_EastR2['OProjW'][i], nba_EastR2['TProjW'][i], 
                             nba_EastR2['TSOS'][i], nba_EastR2['OSOS'][i]]])

   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastR2['Opponent'][7])
        T4wins = T4wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastR2['Team'][7])
        T3wins = T3wins + 1

print("Final Result: ", nba_EastR2['Team'][0], ": ", T1wins, "  ", nba_EastR2['Opponent'][0], ": ", T2wins)
print("Final Result: ", nba_EastR2['Team'][7], ": ", T3wins, "  ", nba_EastR2['Opponent'][7], ": ", T4wins)

Winner was  MIL
Winner was  MIL
Winner was  MIL
Winner was  MIL
############################################################################################################################
Winner was  TOR
Winner was  TOR
Winner was  PHI
Winner was  PHI
Winner was  TOR
Winner was  PHI
Winner was  TOR
Final Result:  MIL :  4    BOS :  0
Final Result:  TOR :  4    PHI :  3


In [226]:
T1wins = 0
T2wins = 0
for i in range(0,7): #0-6
    
    if (T1wins == 4 or T2wins == 4):
        break
    outcomes = grid.predict([[nba_WestR2['Home'][i], nba_WestR2['TeamConf'][i], nba_WestR2['OppConf'][i], nba_WestR2['TMadeP'][i], 
                             nba_WestR2['OppMadeP'][i], nba_WestR2['TOdds'][i], nba_WestR2['OpOdds'][i], nba_WestR2['TeamRating'][i], 
                              nba_WestR2['OppRating'][i], nba_WestR2['TeamAllStars'][i], 
                             nba_WestR2['OppAllStars'][i], nba_WestR2['OProjW'][i], nba_WestR2['TProjW'][i], 
                             nba_WestR2['TSOS'][i], nba_WestR2['OSOS'][i]]])

    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestR2['Opponent'][0])
        T2wins = T2wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestR2['Team'][0])
        T1wins = T1wins + 1

print('############################################################################################################################')

T3wins = 0
T4wins = 0
for i in range(7, 14): #7-13
   
    if (T3wins == 4 or T4wins == 4):
          break

    outcomes = grid.predict([[nba_WestR2['Home'][i], nba_WestR2['TeamConf'][i], nba_WestR2['OppConf'][i], nba_WestR2['TMadeP'][i], 
                             nba_WestR2['OppMadeP'][i], nba_WestR2['TOdds'][i], nba_WestR2['OpOdds'][i], nba_WestR2['TeamRating'][i], 
                              nba_WestR2['OppRating'][i], nba_WestR2['TeamAllStars'][i], 
                             nba_WestR2['OppAllStars'][i], nba_WestR2['OProjW'][i], nba_WestR2['TProjW'][i], 
                             nba_WestR2['TSOS'][i], nba_WestR2['OSOS'][i]]])

   # print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestR2['Opponent'][7])
        T4wins = T4wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestR2['Team'][7])
        T3wins = T3wins + 1

print("Final Result: ", nba_WestR2['Team'][0], ": ", T1wins, "  ", nba_WestR2['Opponent'][0], ": ", T2wins)
print("Final Result: ", nba_WestR2['Team'][7], ": ", T3wins, "  ", nba_WestR2['Opponent'][7], ": ", T4wins)

Winner was  GSW
Winner was  GSW
Winner was  HOU
Winner was  HOU
Winner was  GSW
Winner was  HOU
Winner was  GSW
############################################################################################################################
Winner was  DEN
Winner was  DEN
Winner was  POR
Winner was  POR
Winner was  DEN
Winner was  POR
Winner was  DEN
Final Result:  GSW :  4    HOU :  3
Final Result:  DEN :  4    POR :  3


### Round 3 - Conference Finals

In [227]:
East_r1S3 = {'Team': ["MIL", "MIL", "MIL", "MIL", "MIL", "MIL", "MIL"], 
             'Opponent': ["TOR","TOR","TOR","TOR","TOR","TOR","TOR"],
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["East", "East", "East", "East", "East", "East", "East"], 
       'OppConf': ["East", "East", "East", "East", "East", "East", "East"],
      'TeamAllStars': [2, 2, 2, 2, 2, 2, 2], 
      'OppAllStars': [2, 2, 2, 2, 2, 2, 2], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y'],
      'OppMadeP': ['Y', 'Y','Y','Y','Y','Y','Y']}

West_r1S3 = {'Team': ["GSW", "GSW", "GSW", "GSW", "GSW", "GSW", "GSW"], 
             'Opponent': ["DEN","DEN","DEN","DEN","DEN","DEN","DEN"],
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["West", "West", "West", "West", "West", "West", "West"], 
       'OppConf': ["West", "West", "West", "West", "West", "West", "West"],
      'TeamAllStars': [3, 3, 3, 3, 3, 3, 3], 
      'OppAllStars': [1, 1, 1, 1, 1, 1, 1], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y'],
      'OppMadeP': ['N', 'N','N','N','N','N','N']}

east_final = pd.DataFrame(East_r1S3)
west_final =  pd.DataFrame(West_r1S3)

In [228]:
nba_ef1 = pd.merge(Tstats_19, east_final, on = ['Team'], how='outer')
nba_ef2 = pd.merge(Ostats_19, nba_ef1, on = ['Opponent'], how='outer')
nba_ef2.dropna(how='any', inplace=True)


nba_wf1 = pd.merge(Tstats_19, west_final, on = ['Team'], how='outer')
nba_wf2 = pd.merge(Ostats_19, nba_wf1, on = ['Opponent'], how='outer')
nba_wf2.dropna(how='any', inplace=True)


In [229]:
roster19.rename(columns={'Opponent': 'Team'}, inplace=True)
nba_ef3 = pd.merge(roster19, nba_ef2, on = ['Team'], how='outer')
nba_wf3 = pd.merge(roster19, nba_wf2, on = ['Team'], how='outer')
roster19.rename(columns={'Team': 'Opponent'}, inplace=True)
nba_ef4 = pd.merge(roster19, nba_ef3, on = ['Opponent'], how='outer')
nba_wf4 = pd.merge(roster19, nba_wf3, on = ['Opponent'], how='outer')
nba_ef4.dropna(how='any', inplace=True)
nba_wf4.dropna(how='any', inplace=True)
#nba_E5 = pd.merge(roster19, nba_E4, on = ['Team'], how='outer')
#nba_EastR1 = pd.merge(Oroster19, nba_E5, on = ['Opponent'], how='outer')
#roster19.rename(columns={'Team': 'Opponent'}, inplace=True)

nba_ef5 = pd.merge(TeamOdds_19, nba_ef4, on = ['Team'], how='outer')
nba_EastF= pd.merge(OppOdds_19, nba_ef5, on = ['Opponent'], how='outer')
nba_EastF.dropna(how='any', inplace=True)

nba_wf5 = pd.merge(TeamOdds_19, nba_wf4, on = ['Team'], how='outer')
nba_WestF= pd.merge(OppOdds_19, nba_wf5, on = ['Opponent'], how='outer')
nba_WestF.dropna(how='any', inplace=True)

In [230]:
nba_EastF.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

nba_WestF.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

nba_WestF.reset_index(drop=True,inplace=True)
nba_EastF.reset_index(drop=True,inplace=True)

In [231]:
nba_WestF

Unnamed: 0,Opponent,OpOdds,Team,TOdds,OppRating,TeamRating,OProjW,OSOS,TProjW,TSOS,Home,TeamConf,OppConf,TeamAllStars,OppAllStars,TMadeP,OppMadeP
0,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Home,West,West,3.0,1.0,Y,N
1,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Home,West,West,3.0,1.0,Y,N
2,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Away,West,West,3.0,1.0,Y,N
3,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Away,West,West,3.0,1.0,Y,N
4,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Home,West,West,3.0,1.0,Y,N
5,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Away,West,West,3.0,1.0,Y,N
6,DEN,0.0,GSW,62.0,75.94,79.27,51.0,0.24,56.0,-0.04,Home,West,West,3.0,1.0,Y,N


In [232]:
nba_EastF.loc[:,['Home']] = label_encoder.fit_transform(nba_EastF.loc[:,['Home']]).astype('float64')
nba_EastF.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_EastF.loc[:,['TeamConf']]).astype('float64')
nba_EastF.loc[:,['OppConf']] = label_encoder.fit_transform(nba_EastF.loc[:,['OppConf']]).astype('float64')
nba_EastF.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_EastF.loc[:,['TMadeP']]).astype('float64')
nba_EastF.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_EastF.loc[:,['OppMadeP']]).astype('float64')

nba_WestF.loc[:,['Home']] = label_encoder.fit_transform(nba_WestF.loc[:,['Home']]).astype('float64')
nba_WestF.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_WestF.loc[:,['TeamConf']]).astype('float64')
nba_WestF.loc[:,['OppConf']] = label_encoder.fit_transform(nba_WestF.loc[:,['OppConf']]).astype('float64')
nba_WestF.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_WestF.loc[:,['TMadeP']]).astype('float64')
nba_WestF.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_WestF.loc[:,['OppMadeP']]).astype('float64')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [233]:
T1wins = 0
T2wins = 0
for i in range(0,7): #0-6
    
    if (T1wins == 4 or T2wins == 4):
        break
    outcomes = grid.predict([[nba_EastF['Home'][i], nba_EastF['TeamConf'][i], nba_EastF['OppConf'][i], nba_EastF['TMadeP'][i], 
                             nba_EastF['OppMadeP'][i], nba_EastF['TOdds'][i], nba_EastF['OpOdds'][i], nba_EastF['TeamRating'][i], 
                              nba_EastF['OppRating'][i], nba_EastF['TeamAllStars'][i], 
                             nba_EastF['OppAllStars'][i], nba_EastF['OProjW'][i], nba_EastF['TProjW'][i], 
                             nba_EastF['TSOS'][i], nba_EastF['OSOS'][i]]])

    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_EastF['Opponent'][0])
        T2wins = T2wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_EastF['Team'][0])
        T1wins = T1wins + 1
        
print("Final Result: ", nba_EastF['Team'][0], ": ", T1wins, "  ", nba_EastF['Opponent'][0], ": ", T2wins)

Winner was  MIL
Winner was  MIL
Winner was  TOR
Winner was  TOR
Winner was  MIL
Winner was  TOR
Winner was  MIL
Final Result:  MIL :  4    TOR :  3


In [234]:
T1wins = 0
T2wins = 0
for i in range(0,7): #0-6
    
    if (T1wins == 4 or T2wins == 4):
        break
    outcomes = grid.predict([[nba_WestF['Home'][i], nba_WestF['TeamConf'][i], nba_WestF['OppConf'][i], nba_WestF['TMadeP'][i], 
                             nba_WestF['OppMadeP'][i], nba_WestF['TOdds'][i], nba_WestF['OpOdds'][i], nba_WestF['TeamRating'][i], 
                              nba_WestF['OppRating'][i], nba_WestF['TeamAllStars'][i], 
                             nba_WestF['OppAllStars'][i], nba_WestF['OProjW'][i], nba_WestF['TProjW'][i], 
                             nba_WestF['TSOS'][i], nba_WestF['OSOS'][i]]])

    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_WestF['Opponent'][0])
        T2wins = T2wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_WestF['Team'][0])
        T1wins = T1wins + 1
        
print("Final Result: ", nba_WestF['Team'][0], ": ", T1wins, "  ", nba_WestF['Opponent'][0], ": ", T2wins)

Winner was  GSW
Winner was  GSW
Winner was  DEN
Winner was  DEN
Winner was  GSW
Winner was  DEN
Winner was  GSW
Final Result:  GSW :  4    DEN :  3


## NBA Finals Prediction

In [235]:
# NBA Finals - Who gets Home court advantage -> The better season record

NBA_f1 = {'Team': ["MIL", "MIL", "MIL", "MIL", "MIL", "MIL", "MIL"], 
             'Opponent': ["GSW","GSW","GSW","GSW","GSW","GSW","GSW"],
       'Home': ["Home","Home", "Away", "Away","Home","Away","Home"], 
       'TeamConf': ["East", "East", "East", "East", "East", "East", "East"], 
       'OppConf': ["West", "West", "West", "West", "West", "West", "West"],
      'TeamAllStars': [2, 2, 2, 2, 2, 2, 2], 
      'OppAllStars': [3, 3, 3, 3, 3, 3, 3], 
      'TMadeP': ['Y', 'Y','Y','Y','Y','Y','Y'],
      'OppMadeP': ['Y', 'Y','Y','Y','Y','Y','Y']}

nba_f = pd.DataFrame(NBA_f1)

In [236]:
nba_f1 = pd.merge(Tstats_19, nba_f, on = ['Team'], how='outer')
nba_f2 = pd.merge(Ostats_19, nba_f1, on = ['Opponent'], how='outer')
nba_f2.dropna(how='any', inplace=True)

roster19.rename(columns={'Opponent': 'Team'}, inplace=True)
nba_f3 = pd.merge(roster19, nba_f2, on = ['Team'], how='outer')
roster19.rename(columns={'Team': 'Opponent'}, inplace=True)
nba_f4 = pd.merge(roster19, nba_f3, on = ['Opponent'], how='outer')

nba_ef4.dropna(how='any', inplace=True)
#nba_wf4.dropna(how='any', inplace=True)
#nba_E5 = pd.merge(roster19, nba_E4, on = ['Team'], how='outer')
#nba_EastR1 = pd.merge(Oroster19, nba_E5, on = ['Opponent'], how='outer')
#roster19.rename(columns={'Team': 'Opponent'}, inplace=True)

nba_f5 = pd.merge(TeamOdds_19, nba_f4, on = ['Team'], how='outer')
nba_Finals= pd.merge(OppOdds_19, nba_f5, on = ['Opponent'], how='outer')
nba_Finals.dropna(how='any', inplace=True)

#Reset index
nba_Finals.reset_index(drop=True,inplace=True)

In [237]:
nba_Finals.rename(columns={'Odds_x':'OpOdds', 'Odds_y': 'TOdds', 'SOS_x': 'OSOS', 'SOS_y': 'TSOS', 'PW_x': 'OProjW',
                          'PW_y': 'TProjW', 'Rating_y': 'TeamRating', 'Rating_x': 'OppRating'}, inplace=True)

In [238]:
nba_Finals.loc[:,['Home']] = label_encoder.fit_transform(nba_Finals.loc[:,['Home']]).astype('float64')
nba_Finals.loc[:,['TeamConf']] = label_encoder.fit_transform(nba_Finals.loc[:,['TeamConf']]).astype('float64')
nba_Finals.loc[:,['OppConf']] = label_encoder.fit_transform(nba_Finals.loc[:,['OppConf']]).astype('float64')
nba_Finals.loc[:,['TMadeP']] = label_encoder.fit_transform(nba_Finals.loc[:,['TMadeP']]).astype('float64')
nba_Finals.loc[:,['OppMadeP']] = label_encoder.fit_transform(nba_Finals.loc[:,['OppMadeP']]).astype('float64')

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [239]:
#nba_Finals['TeamStronger'] = nba_Finals['TeamRating'] > nba_Finals['OppRating']

In [240]:
T1wins = 0
T2wins = 0
for i in range(0,7): #0-6
    
    if (T1wins == 4 or T2wins == 4):
        break
    outcomes = grid.predict([[nba_Finals['Home'][i], nba_Finals['TeamConf'][i], nba_Finals['OppConf'][i], nba_Finals['TMadeP'][i], 
                             nba_Finals['OppMadeP'][i], nba_Finals['TOdds'][i], nba_Finals['OpOdds'][i], nba_Finals['TeamRating'][i], 
                              nba_Finals['OppRating'][i], nba_Finals['TeamAllStars'][i], 
                             nba_Finals['OppAllStars'][i], nba_Finals['OProjW'][i], nba_Finals['TProjW'][i],
                             nba_Finals['TSOS'][i], nba_Finals['OSOS'][i]]])

    
    #print("Winner between ", nba_WestR1['Team'][5], "vs", nba_WestR1['Opponent'][5], "was: ")
    if (outcomes == 0):
        print("Winner was ",nba_Finals['Opponent'][0])
        T2wins = T2wins + 1
    elif (outcomes == 1):
        print("Winner was ", nba_Finals['Team'][0])
        T1wins = T1wins + 1
        
print("Final Result: ", nba_Finals['Team'][0], ": ", T1wins, "  ", nba_Finals['Opponent'][0], ": ", T2wins)

Winner was  MIL
Winner was  MIL
Winner was  GSW
Winner was  GSW
Winner was  MIL
Winner was  GSW
Winner was  MIL
Final Result:  MIL :  4    GSW :  3


In [None]:
# Actual Results vs my results -> Look at results document