In [160]:
# Model is built with guidance from https://bitedge.com/blog/predicting-winners-using-bitodds-data-and-python/#predicting-winners-and-finding-profitable-bets
# Only uses 3 factors to determine outcome of NBA game: if they were at home, if they were underdogs, and if they won their last game
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import requests
from bs4 import BeautifulSoup
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import numpy as np

In [161]:
# Scrape data
url = 'https://sportsdatabase.com/NBA/query.html?sdql=date%2C+team%2C+points%2C+o%3Ateam%2C+o%3Apoints%2C+money+line%2C+o%3Amoney+line%2C+streak%2C+site+%40+season+%3D+2020&submit=++S+D+Q+L+%21++'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')
table1 = soup.find('table', id='pyql_id')

headers = []
for i in table1.find_all('th'):
    title = i.text
    headers.append(title)

df = pd.DataFrame(columns = headers)
for j in table1.find_all('tr')[1:]:
    row_data = j.find_all('td')
    row = [i.text for i in row_data]
    length = len(df)
    df.loc[length] = row

df

Unnamed: 0,date,team,points,o:team,o:points,money line,o:money line,streak,site
0,20201222,Lakers,109,Clippers,116,-134,+111,,home
1,20201222,Clippers,116,Lakers,109,+111,-134,,away
2,20201222,Nets,125,Warriors,99,-294,+235,,home
3,20201222,Warriors,99,Nets,125,+235,-294,,away
4,20201223,Bulls,104,Hawks,124,+107,-129,,home
...,...,...,...,...,...,...,...,...,...
2167,20210519,Warriors,100,Lakers,103,+197,-243,6,away
2168,20210520,Wizards,142,Pacers,115,-158,+132,-1,home
2169,20210520,Pacers,115,Wizards,142,+132,-158,2,away
2170,20210521,Warriors,112,Grizzlies,117,-209,+174,-1,home


In [162]:
# is_underdog
df["is_underdog"] = df["money line"] < df["o:money line"]

In [163]:
# is_home
df["is_home"] = df["site"] == "home" 

In [164]:
# is_winner
df["is_winner"] = df["points"] > df["o:points"]

In [165]:
# won_last_game
frames = []

for _, team_selections in df.groupby("team"):
    team_selections = team_selections.sort_values("date")
    team_selections["won_last_game"] = team_selections.is_winner.shift()
    frames.append(team_selections)

df = pd.concat(frames)
df = df.sort_index()

df = df.dropna(subset=["won_last_game"]).copy()
df = df.reset_index(drop=True)

In [166]:
#split train and test
test_size = 0.4
X_train, X_test, y_train, y_test = train_test_split(df[["is_underdog", "is_home", "won_last_game"]], df["is_winner"], test_size=test_size)
#df_train, df_test = np.split(df, 2)
#X_train = df_train[["is_underdog", "is_home", "won_last_game"]]
#X_test = df_test[["is_underdog", "is_home", "won_last_game"]]

#y_train = df_train.is_winner
#y_test = df_test.is_winner
print(X_test)

      is_underdog  is_home won_last_game
2125         True    False         False
185          True    False          True
2035        False    False          True
1262         True     True          True
407          True    False          True
...           ...      ...           ...
1058         True     True          True
2091         True    False          True
1021        False    False          True
975         False    False         False
586         False     True         False

[857 rows x 3 columns]


In [167]:
model = SVC(probability = True).fit(X_train, y_train)
prediction_accuracy = (model.predict(X_test) == y_test).mean() * 100
print(prediction_accuracy)

50.291715285880976


In [170]:
#Backtesting to find model profit in test dataset
predicted_prob = model.predict_proba(X_test)[:, 1]
ind_list = X_test.index.tolist()
test_df = df.iloc[ind_list]
test_df["proba"] = predicted_prob

def team_bet(row):
    # If team predicted to win
    if float(row["proba"]) > 0.5:
        # If they didn't win
        if row["is_winner"] == False:
            return -100
        # If they won
        else:
            # If odd is positive
            if int(row['money line']) > 0:
                return abs(int(row['money line'])) + 100
            else:
                return 10000/abs(int(row['money line'])) + 100
    # team predicted to lose
    else:
        return 0
test_df['profit'] = test_df.apply(lambda row: team_bet(row), axis=1)
total_profit = test_df["profit"].sum()
initial_investment = (test_df["profit"].astype(bool).sum(axis=0))*100
roi = (initial_investment/total_profit)*100
print("With an initial investment of ${}, you would make ${}. You multiplied your bankrool by {}%!".format(initial_investment, int(total_profit), round(roi, 2)))

With an initial investment of $43000, you would make $26990. You multiplied your bankrool by 159.32%!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["proba"] = predicted_prob
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['profit'] = test_df.apply(lambda row: team_bet(row), axis=1)
