# Predicting the season
This is where things get extremely arbitrary. Each team will be given a rating purely based off my intuition as a basketball fan. Each team will be given an advantage or disadvantage in each game depending on whether they're home or away or based on how many days rest they have had before the game. There is also an element of randomness in basketball. When predicting each game I will include a random variable that will accountable for 15% of the likelihood of the outcome of the game.

## Imports

In [354]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import time

import datetime

import random

# Loading the NBA Games

In [355]:
df = pd.read_excel('../NBA_game_data.xlsx')
df.shape, df.dtypes

((1230, 67),
 Date                    datetime64[ns]
 Away_team                       object
 Home_team                       object
 Philadelphia 76ers               int64
 Los Angeles Lakers               int64
                              ...      
 Miami Heat last game    datetime64[ns]
 away_last_date          datetime64[ns]
 home_last_Date          datetime64[ns]
 away_team_dslg                   int64
 home_team_dslg                   int64
 Length: 67, dtype: object)

# Rating each team
Ratings will be floats between 0.0 and 10.0

In [356]:
df['Away_team'].unique()

array(['Philadelphia 76ers', 'Los Angeles Lakers', 'Orlando Magic',
       'Washington Wizards', 'Houston Rockets', 'New Orleans Pelicans',
       'New York Knicks', 'Chicago Bulls', 'Cleveland Cavaliers',
       'Oklahoma City Thunder', 'Charlotte Hornets', 'Denver Nuggets',
       'Dallas Mavericks', 'Portland Trail Blazers', 'Milwaukee Bucks',
       'Los Angeles Clippers', 'San Antonio Spurs', 'Toronto Raptors',
       'Boston Celtics', 'Detroit Pistons', 'Memphis Grizzlies',
       'Utah Jazz', 'Phoenix Suns', 'Minnesota Timberwolves',
       'Sacramento Kings', 'Indiana Pacers', 'Brooklyn Nets',
       'Golden State Warriors', 'Atlanta Hawks', 'Miami Heat'],
      dtype=object)

In [357]:
team_ratings = {'Philadelphia 76ers':8.8, 'Los Angeles Lakers':7.9, 'Orlando Magic':7.0,
       'Washington Wizards':7.4, 'Houston Rockets':6.0, 'New Orleans Pelicans':8.4,
       'New York Knicks':7.4, 'Chicago Bulls':7.9, 'Cleveland Cavaliers':8.4,
       'Oklahoma City Thunder':6.5, 'Charlotte Hornets':6.7, 'Denver Nuggets':9.1,
       'Dallas Mavericks':8.7, 'Portland Trail Blazers':8.0, 'Milwaukee Bucks':9.0,
       'Los Angeles Clippers':9.0, 'San Antonio Spurs':6.0, 'Toronto Raptors':8.2,
       'Boston Celtics':8.8, 'Detroit Pistons':6.8, 'Memphis Grizzlies':8.4,
       'Utah Jazz':6.0, 'Phoenix Suns':8.6, 'Minnesota Timberwolves':8.6,
       'Sacramento Kings':7.7, 'Indiana Pacers':6.6, 'Brooklyn Nets':8.7,
       'Golden State Warriors':8.9, 'Atlanta Hawks':8.4, 'Miami Heat':8.4}

In [358]:
len(team_ratings)

30

# Add team ratings to Dataframe

In [359]:
df['Away_team_rating'] = df['Away_team'].map(team_ratings)
df['Home_team_rating'] = df['Home_team'].map(team_ratings)

In [360]:
df.head(2)

Unnamed: 0,Date,Away_team,Home_team,Philadelphia 76ers,Los Angeles Lakers,Orlando Magic,Washington Wizards,Houston Rockets,New Orleans Pelicans,New York Knicks,...,Brooklyn Nets last game,Golden State Warriors last game,Atlanta Hawks last game,Miami Heat last game,away_last_date,home_last_Date,away_team_dslg,home_team_dslg,Away_team_rating,Home_team_rating
0,2022-10-18,Philadelphia 76ers,Boston Celtics,1,0,0,0,0,0,0,...,NaT,NaT,NaT,NaT,2022-04-09,2022-04-09,192,192,8.8,8.8
1,2022-10-18,Los Angeles Lakers,Golden State Warriors,0,1,0,0,0,0,0,...,NaT,2022-04-09,NaT,NaT,2022-04-09,2022-04-09,192,192,7.9,8.9


# Simulating games

In [361]:
team_wins = {'Philadelphia 76ers':0, 'Los Angeles Lakers':0, 'Orlando Magic':0,
       'Washington Wizards':0, 'Houston Rockets':0, 'New Orleans Pelicans':0,
       'New York Knicks':0, 'Chicago Bulls':0, 'Cleveland Cavaliers':0,
       'Oklahoma City Thunder':0, 'Charlotte Hornets':0, 'Denver Nuggets':0,
       'Dallas Mavericks':0, 'Portland Trail Blazers':0, 'Milwaukee Bucks':0,
       'Los Angeles Clippers':0, 'San Antonio Spurs':0, 'Toronto Raptors':0,
       'Boston Celtics':0, 'Detroit Pistons':0, 'Memphis Grizzlies':0,
       'Utah Jazz':0, 'Phoenix Suns':0, 'Minnesota Timberwolves':0,
       'Sacramento Kings':0, 'Indiana Pacers':0, 'Brooklyn Nets':0,
       'Golden State Warriors':0, 'Atlanta Hawks':0, 'Miami Heat':0}

Loop through dataframe

In [362]:
teams = df['Away_team'].unique()
wins_df = pd.DataFrame(teams)
for i in range(1000):
    team_wins_copy = team_wins.copy()
    for index, row in df.iterrows():
        away_team_rating = row['Away_team_rating'] * 0.97 # slight disadvantage for playing away
        home_team_rating = row['Home_team_rating']

        # penalise away team for limited rest days
        if row['away_team_dslg'] == 1:
            away_team_rating = away_team_rating * 0.85
        elif row['away_team_dslg'] == 2:
            away_team_rating = away_team_rating * 0.9
        elif row['away_team_dslg'] == 3:
            away_team_rating = away_team_rating * 0.95

        # penalise home team for limited rest days   
        if row['home_team_dslg'] == 1:
            home_team_rating = home_team_rating * 0.85
        elif row['home_team_dslg'] == 2:
            home_team_rating = home_team_rating * 0.9
        elif row['home_team_dslg'] == 3:
            home_team_rating = home_team_rating * 0.95

        away_team_rating = (away_team_rating * 0.75) + (random.randint(0, 10) * 0.25)
        home_team_rating = (home_team_rating * 0.75) + (random.randint(0, 10) * 0.25)

        if away_team_rating > home_team_rating:
            team_wins_copy[row['Away_team']] = team_wins_copy[row['Away_team']] + 1
        elif home_team_rating > away_team_rating:
            team_wins_copy[row['Home_team']] = team_wins_copy[row['Home_team']] + 1
        elif home_team_rating == away_team_rating:
            # if the ratings are tied then the home team wins
            team_wins_copy[row['Home_team']] = team_wins_copy[row['Home_team']] + 1

        wins_df[i+1] = wins_df[0].map(team_wins_copy)

In [363]:
wins_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,1000
0,Philadelphia 76ers,56,57,59,60,44,59,54,56,55,...,56,56,60,57,55,50,51,59,56,51
1,Los Angeles Lakers,44,44,44,39,36,45,43,35,47,...,36,43,41,40,43,35,44,38,40,34
2,Orlando Magic,27,22,26,28,34,25,30,31,31,...,23,26,34,24,24,24,22,32,23,31
3,Washington Wizards,26,41,35,36,35,36,32,28,36,...,33,39,25,30,39,27,35,40,38,35
4,Houston Rockets,13,11,11,15,17,13,14,16,14,...,17,14,9,10,10,14,22,13,16,14
5,New Orleans Pelicans,45,47,44,46,60,52,49,60,44,...,51,50,49,45,52,51,48,48,48,51
6,New York Knicks,29,31,35,37,33,36,33,43,33,...,32,30,35,25,24,34,34,40,32,27
7,Chicago Bulls,43,42,40,41,49,43,43,47,40,...,35,41,36,45,38,44,38,41,42,38
8,Cleveland Cavaliers,48,54,49,56,42,51,44,53,44,...,43,40,41,55,47,47,57,48,50,52
9,Oklahoma City Thunder,16,26,15,23,21,21,15,14,17,...,19,13,23,23,26,16,16,18,17,18


# Mean wins

In [364]:
wins_df['mean_wins'] = wins_df.iloc[:, 1:1001].mean(axis=1).round()

In [365]:
wins_df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,992,993,994,995,996,997,998,999,1000,mean_wins
0,Philadelphia 76ers,56,57,59,60,44,59,54,56,55,...,56,60,57,55,50,51,59,56,51,55.0
1,Los Angeles Lakers,44,44,44,39,36,45,43,35,47,...,43,41,40,43,35,44,38,40,34,40.0


# Split by Conference

In [366]:
wins_df[0].unique()

array(['Philadelphia 76ers', 'Los Angeles Lakers', 'Orlando Magic',
       'Washington Wizards', 'Houston Rockets', 'New Orleans Pelicans',
       'New York Knicks', 'Chicago Bulls', 'Cleveland Cavaliers',
       'Oklahoma City Thunder', 'Charlotte Hornets', 'Denver Nuggets',
       'Dallas Mavericks', 'Portland Trail Blazers', 'Milwaukee Bucks',
       'Los Angeles Clippers', 'San Antonio Spurs', 'Toronto Raptors',
       'Boston Celtics', 'Detroit Pistons', 'Memphis Grizzlies',
       'Utah Jazz', 'Phoenix Suns', 'Minnesota Timberwolves',
       'Sacramento Kings', 'Indiana Pacers', 'Brooklyn Nets',
       'Golden State Warriors', 'Atlanta Hawks', 'Miami Heat'],
      dtype=object)

In [367]:
west = ['Los Angeles Lakers', 'Houston Rockets', 'New Orleans Pelicans',
       'Oklahoma City Thunder', 'Denver Nuggets', 'Dallas Mavericks',
        'Portland Trail Blazers', 'Los Angeles Clippers', 'San Antonio Spurs',
       'Memphis Grizzlies', 'Utah Jazz', 'Phoenix Suns', 'Minnesota Timberwolves',
       'Sacramento Kings', 'Golden State Warriors']

east = ['Philadelphia 76ers', 'Orlando Magic', 'Washington Wizards',
        'New York Knicks', 'Chicago Bulls', 'Cleveland Cavaliers',
        'Charlotte Hornets', 'Milwaukee Bucks', 'Toronto Raptors',
       'Boston Celtics', 'Detroit Pistons', 'Indiana Pacers', 'Brooklyn Nets',
       'Atlanta Hawks', 'Miami Heat']

In [368]:
west_df = pd.DataFrame(west)
east_df = pd.DataFrame(east)

In [369]:
team_win_dict = wins_df[[0, 'mean_wins']].set_index(0).to_dict('dict')['mean_wins']

In [370]:
west_df['Wins'] = west_df[0].map(team_win_dict)
east_df['Wins'] = east_df[0].map(team_win_dict)

In [371]:
west_df = west_df.rename(columns={0:"Team"})
east_df = east_df.rename(columns={0:"Team"})

In [372]:
west_df.sort_values(by=['Wins'], ascending=False).reset_index(drop=True)

Unnamed: 0,Team,Wins
0,Denver Nuggets,59.0
1,Los Angeles Clippers,59.0
2,Golden State Warriors,56.0
3,Dallas Mavericks,55.0
4,Minnesota Timberwolves,54.0
5,Phoenix Suns,52.0
6,Memphis Grizzlies,49.0
7,New Orleans Pelicans,48.0
8,Portland Trail Blazers,43.0
9,Los Angeles Lakers,40.0


In [373]:
east_df.sort_values(by=['Wins'], ascending=False).reset_index(drop=True)

Unnamed: 0,Team,Wins
0,Milwaukee Bucks,58.0
1,Philadelphia 76ers,55.0
2,Boston Celtics,55.0
3,Brooklyn Nets,53.0
4,Cleveland Cavaliers,49.0
5,Atlanta Hawks,49.0
6,Miami Heat,49.0
7,Toronto Raptors,46.0
8,Chicago Bulls,41.0
9,Washington Wizards,33.0
