In [1]:
import seaborn as sns
import pandas as pd
import numpy as np 
import re
pd.set_option('display.max_columns', 500)

In [2]:
#open csv, drop unnecessary columns
nba = pd.read_csv("sched1920.csv")
nba = nba.drop(columns = ["Unnamed: 6", "Start (ET)","Notes","Unnamed: 7","Attend."])


In [3]:
#clean date and turn to date_time type
nba.Date = nba.Date.str.replace("^[A-z]{3}","-")
nba.Date.str.lstrip("- ")
nba.Date = nba.Date.str.replace(" ","-").str.lstrip("-")
nba.Date = pd.to_datetime(nba.Date)

In [4]:
#renaming columns
nba = nba.rename(columns = {"PTS":"AwayPTS", "PTS.1":"HomePTS", "Visitor/Neutral":"Away","Home/Neutral":"Home", "Attend.":"Attend"})

In [5]:
nba.head()

Unnamed: 0,Date,Away,AwayPTS,Home,HomePTS
0,2019-10-22,New Orleans Pelicans,122.0,Toronto Raptors,130.0
1,2019-10-22,Los Angeles Lakers,102.0,Los Angeles Clippers,112.0
2,2019-10-23,Chicago Bulls,125.0,Charlotte Hornets,126.0
3,2019-10-23,Detroit Pistons,119.0,Indiana Pacers,110.0
4,2019-10-23,Cleveland Cavaliers,85.0,Orlando Magic,94.0


In [6]:
#creating Target column
nba["HomeWin"] = np.where(nba["HomePTS"] > nba["AwayPTS"], 1,0)


In [7]:
#copy of original dataframe to be filled with dummies
nbatest = nba[["Away","Home","HomeWin"]]


In [8]:
from sklearn import preprocessing
dummies = pd.get_dummies(nbatest[["Away","Home"]])
nbatest[dummies.columns] = dummies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


In [9]:
nbatest = nbatest.drop(columns = ["Home","Away"])

In [10]:
#defining functions that check the accuracy score of our predictions using different models
#Random Forests
def RFscore(nbatest):
    X_train = nbatest[:550].drop(columns = "HomeWin")    
    y_train = nbatest["HomeWin"][:550]
    X_test = nbatest[550:596].drop(columns = "HomeWin")
    y_test = nbatest["HomeWin"][550:596]
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.datasets import make_classification
    mod = RandomForestClassifier(n_estimators = 100)
    mod.fit(X_train, y_train)
    y_pred = mod.predict(X_test)
    from sklearn.metrics import accuracy_score
    return accuracy_score(y_test,y_pred)


#SVC
def SVCscore(nbatest):
    #train data/ test data
    X_train = nbatest[:550].drop(columns = "HomeWin")    
    y_train = nbatest["HomeWin"][:550]
    X_test = nbatest[550:596].drop(columns = "HomeWin")
    y_test = nbatest["HomeWin"][550:596]
    
    #fit model
    from sklearn.svm import SVC
    svc = SVC(gamma='auto')
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    from sklearn.metrics import accuracy_score
    return accuracy_score(y_test,y_pred)


#LogisticRegression
def LRscore(nbatest):
    X_train = nbatest[:550].drop(columns = "HomeWin")    
    y_train = nbatest["HomeWin"][:550]
    X_test = nbatest[550:596].drop(columns = "HomeWin")
    y_test = nbatest["HomeWin"][550:596]
    
    from sklearn.linear_model import LogisticRegression
    mod = LogisticRegression()
    mod.fit(X_train,y_train)
    y_pred = mod.predict(X_test)
    from sklearn.metrics import accuracy_score
    return accuracy_score(y_test,y_pred)

In [11]:
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))




Logistic Regression Accuracy Score:  0.7391304347826086 
SVC Accuracy score: 0.5434782608695652 
Random Forests Accuracy score: 0.6304347826086957


In [12]:
#Adding "time on the road" feature
nbatest["Timeaway"] = 0
away_counts = {}
for i in nba["Away"].unique():
    away_counts[i] = 0
for i in range(len(nba.Away)):
    away_counts[nba.Away[i]] +=1
    away_counts[nba.Home[i]] = 0
    nbatest.iloc[i,-1] = away_counts[nba.Away[i]]

print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))



Logistic Regression Accuracy Score:  0.7391304347826086 
SVC Accuracy score: 0.5434782608695652 
Random Forests Accuracy score: 0.5869565217391305




In [13]:
nbatest.head()

Unnamed: 0,HomeWin,Away_Atlanta Hawks,Away_Boston Celtics,Away_Brooklyn Nets,Away_Charlotte Hornets,Away_Chicago Bulls,Away_Cleveland Cavaliers,Away_Dallas Mavericks,Away_Denver Nuggets,Away_Detroit Pistons,Away_Golden State Warriors,Away_Houston Rockets,Away_Indiana Pacers,Away_Los Angeles Clippers,Away_Los Angeles Lakers,Away_Memphis Grizzlies,Away_Miami Heat,Away_Milwaukee Bucks,Away_Minnesota Timberwolves,Away_New Orleans Pelicans,Away_New York Knicks,Away_Oklahoma City Thunder,Away_Orlando Magic,Away_Philadelphia 76ers,Away_Phoenix Suns,Away_Portland Trail Blazers,Away_Sacramento Kings,Away_San Antonio Spurs,Away_Toronto Raptors,Away_Utah Jazz,Away_Washington Wizards,Home_Atlanta Hawks,Home_Boston Celtics,Home_Brooklyn Nets,Home_Charlotte Hornets,Home_Chicago Bulls,Home_Cleveland Cavaliers,Home_Dallas Mavericks,Home_Denver Nuggets,Home_Detroit Pistons,Home_Golden State Warriors,Home_Houston Rockets,Home_Indiana Pacers,Home_Los Angeles Clippers,Home_Los Angeles Lakers,Home_Memphis Grizzlies,Home_Miami Heat,Home_Milwaukee Bucks,Home_Minnesota Timberwolves,Home_New Orleans Pelicans,Home_New York Knicks,Home_Oklahoma City Thunder,Home_Orlando Magic,Home_Philadelphia 76ers,Home_Phoenix Suns,Home_Portland Trail Blazers,Home_Sacramento Kings,Home_San Antonio Spurs,Home_Toronto Raptors,Home_Utah Jazz,Home_Washington Wizards,Timeaway
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1


In [14]:
#adding Home win streak and Away win streak features

nbatest["HomeWinStreak"] = 0
nbatest["AwayWinStreak"] = 0
win_counts = {}
for i in nba["Away"].unique():
    win_counts[i] = 0
    
for row in range(len(nba)):
    nbatest.iloc[row,-2] = win_counts[nba["Home"][row]]
    nbatest.iloc[row,-1] = win_counts[nba["Away"][row]]
    if nbatest["HomeWin"][row] == 1:
        win_counts[nba["Home"][row]] +=1
        win_counts[nba["Away"][row]] == 0
    else:
        win_counts[nba["Away"][row]] +=1
        win_counts[nba["Home"][row]] == 0

    
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))

Logistic Regression Accuracy Score:  0.6521739130434783 
SVC Accuracy score: 0.6304347826086957 
Random Forests Accuracy score: 0.6956521739130435




In [18]:
nbatest.head(40)

Unnamed: 0,HomeWin,Away_Atlanta Hawks,Away_Boston Celtics,Away_Brooklyn Nets,Away_Charlotte Hornets,Away_Chicago Bulls,Away_Cleveland Cavaliers,Away_Dallas Mavericks,Away_Denver Nuggets,Away_Detroit Pistons,Away_Golden State Warriors,Away_Houston Rockets,Away_Indiana Pacers,Away_Los Angeles Clippers,Away_Los Angeles Lakers,Away_Memphis Grizzlies,Away_Miami Heat,Away_Milwaukee Bucks,Away_Minnesota Timberwolves,Away_New Orleans Pelicans,Away_New York Knicks,Away_Oklahoma City Thunder,Away_Orlando Magic,Away_Philadelphia 76ers,Away_Phoenix Suns,Away_Portland Trail Blazers,Away_Sacramento Kings,Away_San Antonio Spurs,Away_Toronto Raptors,Away_Utah Jazz,Away_Washington Wizards,Home_Atlanta Hawks,Home_Boston Celtics,Home_Brooklyn Nets,Home_Charlotte Hornets,Home_Chicago Bulls,Home_Cleveland Cavaliers,Home_Dallas Mavericks,Home_Denver Nuggets,Home_Detroit Pistons,Home_Golden State Warriors,Home_Houston Rockets,Home_Indiana Pacers,Home_Los Angeles Clippers,Home_Los Angeles Lakers,Home_Memphis Grizzlies,Home_Miami Heat,Home_Milwaukee Bucks,Home_Minnesota Timberwolves,Home_New Orleans Pelicans,Home_New York Knicks,Home_Oklahoma City Thunder,Home_Orlando Magic,Home_Philadelphia 76ers,Home_Phoenix Suns,Home_Portland Trail Blazers,Home_Sacramento Kings,Home_San Antonio Spurs,Home_Toronto Raptors,Home_Utah Jazz,Home_Washington Wizards,Timeaway,HomeWinStreak,AwayWinStreak
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
7,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
8,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
9,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0


In [19]:
#adding Home lose streak and Away lose streak features
nbatest["HomeLoseStreak"] = 0
nbatest["AwayLoseStreak"] = 0
loss_counts = {}
for i in nba["Away"].unique():
    loss_counts[i] = 0
    

for row in range(len(nba)):
    nbatest.iloc[row,-2] = loss_counts[nba["Home"][row]]
    nbatest.iloc[row,-1] = loss_counts[nba["Away"][row]]
    if nbatest["HomeWin"][row] == 1:
        loss_counts[nba["Away"][row]] +=1
        loss_counts[nba["Home"][row]] == 0
    else:
        loss_counts[nba["Home"][row]] +=1
        loss_counts[nba["Away"][row]] == 0

    
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))

Logistic Regression Accuracy Score:  0.6956521739130435 
SVC Accuracy score: 0.5869565217391305 
Random Forests Accuracy score: 0.6956521739130435




In [21]:
# nbatest.head(40)

In [None]:
allstar_count = {}
for i in nba.Home.unique():
    allstar_count[i] = 0
    
#West
allstar_count['Dallas Mavericks'] = 1
allstar_count['Houston Rockets'] = 2
allstar_count[ 'Los Angeles Clippers'] = 2
allstar_count['Los Angeles Lakers'] = 2
allstar_count['Portland Trail Blazers'] = 1
allstar_count['Utah Jazz'] = 1
allstar_count['Minnesota Timberwolves'] = 1
allstar_count['Denver Nuggets'] = 1
allstar_count['Phoenix Suns'] = 1


#East
allstar_count['Atlanta Hawks'] = 1
allstar_count['Boston Celtics'] = 2
allstar_count['Toronto Raptors'] = 1
allstar_count['Milwaukee Bucks'] = 2
allstar_count['Philadelphia 76ers'] = 2
allstar_count['Miami Heat'] = 2
allstar_count['Washington Wizards'] = 1
allstar_count['Indiana Pacers'] = 1







In [None]:
nbatest["HomeAllstars"] = 0
nbatest["AwayAllstars"] = 0
for i in range(len(nba)):
    nbatest.iloc[i,-1] = allstar_count[nba["Away"][i]]
    nbatest.iloc[i,-2] = allstar_count[nba["Home"][i]]


In [None]:
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))


In [None]:
# from sklearn.metrics import confusion_matrix
# confusion_matrix()

In [None]:
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''
#unused features
'''unused features'''

In [None]:
depth = pd.read_csv("depth.csv")
depth["avg"] = (depth["bench1"]+depth["bench2"]+depth["bench3"]+depth["bench4"]+depth["bench5"])/5

# depth

In [None]:
depth.sort_values("avg")

In [None]:
depth_dict ={}
for i in depth.team.unique():
    depth_dict[i] = float(depth[depth["team"] == i]["avg"])
nba.head()
print(depth_dict, nba.head())

In [None]:
nbatest["HomeBenchRating"] = 0
nbatest["AwayBenchRating"] = 0
for i in range(len(nba)):
    nbatest.iloc[i,-1] = depth_dict[nba.Away[i]]
    nbatest.iloc[i,-2] = depth_dict[nba.Home[i]]


In [None]:
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))


In [None]:
nbatest.head(10)

In [None]:
score_dict = {}
for i in nba.Home.unique():
    score_dict[i] = {"counter" : 0, "totalscore" : 0}
score_dict["Miami Heat"]

In [None]:
nbatest.head(20)

In [None]:
nbatest["AwayAvgScore"] = 0
nbatest["HomeAvgScore"] = 0
for i in range(len(nba)):
    if score_dict[nba.Home[i]]["counter"] ==0 or score_dict[nba.Away[i]]["counter"] == 0:
        nbatest.iloc[i,-1] = 0
        nbatest.iloc[i,-2] = 0
    else: 
        nbatest.iloc[i,-1] = score_dict[nba.Home[i]]["totalscore"] / score_dict[nba.Home[i]]["counter"]
        nbatest.iloc[i,-2] = score_dict[nba.Away[i]]["totalscore"] / score_dict[nba.Away[i]]["counter"]
    #Home team counter and total score
    score_dict[nba.Home[i]]["counter"] +=1 
    score_dict[nba.Home[i]]["totalscore"] += nba.HomePTS[i]
    #Away team counter and score
    score_dict[nba.Away[i]]["counter"]+= 1 
    score_dict[nba.Away[i]]["totalscore"] += nba.AwayPTS[i]

print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))


In [None]:
nbatest.head()

In [None]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(nbatest[["AwayAvgScore","HomeAvgScore"]])
scaled = scaler.transform(nbatest[["AwayAvgScore","HomeAvgScore"]])
nbatest[["AwayAvgScore","HomeAvgScore"]]= scaled
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))


In [None]:
sns.heatmap(nbatest[['Timeaway', 'HomeWinStreak', 'AwayWinStreak',
       'HomeLoseStreak', 'AwayLoseStreak', 'HomeCoachSavage',
       'AwayCoachSavage', 'HomeAllstars', 'AwayAllstars',"HomeBenchRating","AwayBenchRating"]].corr())

In [None]:
#RANKINGS
#Creates dictionary with team and their rankings
feb_ranks = pd.read_csv("feb_ranks.csv")
feb_ranks["Team"] = feb_ranks["Western Conference"]
rankdict = {}
for i in range(len(feb_ranks.Team)):
    rankdict[feb_ranks.Team[i]] = feb_ranks.Rk[i]


In [None]:
#Creates HomeRank and AwayRank for each matchup in nba dataset
nbatest["HomeRank"] = 0
nbatest["AwayRank"] = 0
for i in range(len(nbatest.HomeRank)):
    #Setting AwayRank for row i
    nbatest.iloc[i,-1] = rankdict[nba.Away[i]]
    #Setting HomeRank for row i
    nbatest.iloc[i,-2] = rankdict[nba.Home[i]]

In [None]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(nbatest[["HomeRank","AwayRank"]])
scaled = scaler.transform(nbatest[["HomeRank","AwayRank"]])
nbatest[["HomeRank","AwayRank"]]= scaled
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))


In [None]:
#creates a single column that checks if the home team is ranked higher than away team 
rankspread = nbatest["HomeRank"]-nbatest["AwayRank"]
nbatest["HomeRanksHigher"]= 0
#if rankspread is positive, home is better
#if rankspread is negative, away is better
for i in range(len(nbatest)):
    if rankspread[i] > 0:
        nbatest.iloc[i,-1] = 1
    else:
        nbatest.iloc[i,-1] = 0
        

print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))

In [None]:
#creates  a single column that checks if the home team is significantly better than the away team
rankspread = nbatest["HomeRank"]-nbatest["AwayRank"]

nbatest["HomeisFav"]= 0
for i in range(len(nbatest)):
    if rankspread[i] > 0 and rankspread[i] >5:
        nbatest.iloc[i,-1] = 1
    elif rankspread[i]<0 and abs(rankspread[i])>5:
        nbatest.iloc[i,-1] = 0
    else:
        nbatest.iloc[i,-1] = 0
nbatest.drop(columns = ["HomeRank","AwayRank"], inplace = True) 
print("Logistic Regression Accuracy Score: ",LRscore(nbatest),"\nSVC Accuracy score:",SVCscore(nbatest),"\nRandom Forests Accuracy score:",RFscore(nbatest))

In [None]:

nbatest[["HomeWinStreak","AwayWinStreak"]]

In [None]:
X_train = nbatest[:984].drop(columns = "HomeWin")    
y_train = nbatest["HomeWin"][:984]
X_test = nbatest[984:].drop(columns = "HomeWin")
y_test = nbatest["HomeWin"][984:]
    
from sklearn.linear_model import LogisticRegression
mod = LogisticRegression()
mod.fit(X_train,y_train)
y_pred = mod.predict(X_test)
from sklearn.metrics import accuracy_score




In [None]:
nbatest.iloc[597:,1] = 1

In [None]:
#split data Train and Test 
X_train = nbatest[:500].drop(columns = "HomeWin")    
y_train = nbatest["HomeWin"][:500]
X_test = nbatest[500:550].drop(columns = "HomeWin")
y_test = nbatest["HomeWin"][500:550]
    
#fitting model 
from sklearn.linear_model import LogisticRegression
mod = LogisticRegression()
mod.fit(X_train,y_train)
    
#make predictions on 
y_pred = mod.predict(nbatest.iloc[550:600,1:])
    
    
probs = mod.predict_proba(nbatest.iloc[550:600,1:])
good_list = np.where(probs > .99)[0] 

#remeberthat next line is x>= end of train and x<= end of test-1
test_ready =[x for x in good_list if x >= 550 and x<=599]
test_indices = [i-550 for i in test_ready]
final_preds = [y_pred[i] for i in range(len(y_pred))if i in good_list ]

gamble = nba.iloc[test_ready,[0,1,3]]
gamble['HomeWinPredictions'] = final_preds

In [None]:
nbatest.iloc[597:,0] = 1


In [None]:
nbatest.iloc[:597,0].value_counts()

In [None]:
def real_predict(df, nba, train_upperbound, test_upperbound):
    #split data
    X_train = df[:train_upperbound].drop(columns = "HomeWin") 
    y_train = df['HomeWin'][:train_upperbound]

    
    #fit model
    from sklearn.linear_model import LogisticRegression
    mod = LogisticRegression()
    mod.fit(X_train,y_train)
    
    #make predictions on new data
    y_pred = mod.predict(df.iloc[train_upperbound:test_upperbound, 1:])
    
    #create probabilities of new y_pred predictions
    probabilities = mod.predict_proba(df.iloc[train_upperbound:test_upperbound, 1:])
    
    #make a list of the indices which have a pick with higher than 90% probability 
    solid_picks = np.where(probabilities > .99)
    
    #use these indices to see what the actual picks are 
    indices = [x+train_upperbound for x in solid_picks[0]]
    pick_frame = nba.iloc[indices]
    pick_frame['Predictions'] = y_pred[solid_picks[0]]
    

    return y_pred, probabilities

In [None]:
real_predict(nbatest,nba,597,650)

In [None]:
#full frame with all predictions and probabilities 
#must make return statement y_pred, probabilities 
preds, probs = real_predict(nbatest,nba,597,650)
df = nba.iloc[597:650,:-1]
df['predictions'] = preds
probs = [max(x) for x in probs]
df['probs'] = probs
df[df['predictions'] == 0]

In [None]:
realframe = real_predict(nbatest,nba,550,596)
from sklearn.metrics import accuracy_score
accuracy_score(realframe.HomeWin, realframe.Predictions)