## Imports

In [240]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import time

import datetime

# Loading the NBA Games

In [241]:
df = pd.read_excel('../NBA_schedule.xlsx')
df.shape, df.dtypes

((1230, 3),
 Date         datetime64[ns]
 Away team            object
 Home team            object
 dtype: object)

In [242]:
df = df.rename(columns={'Away team': 'Away_team', 'Home team': 'Home_team'})

In [243]:
df[df['Away_team'] == 'Dallas Mavericks'].head()

Unnamed: 0,Date,Away_team,Home_team
12,2022-10-19,Dallas Mavericks,Phoenix Suns
52,2022-10-25,Dallas Mavericks,New Orleans Pelicans
65,2022-10-27,Dallas Mavericks,Brooklyn Nets
160,2022-11-09,Dallas Mavericks,Orlando Magic
171,2022-11-10,Dallas Mavericks,Washington Wizards


# Adding features

First I need to add a column for each team with 1 if they played in this game and 0 if they did not

In [244]:
df.Away_team.unique()

array(['Philadelphia 76ers', 'Los Angeles Lakers', 'Orlando Magic',
       'Washington Wizards', 'Houston Rockets', 'New Orleans Pelicans',
       'New York Knicks', 'Chicago Bulls', 'Cleveland Cavaliers',
       'Oklahoma City Thunder', 'Charlotte Hornets', 'Denver Nuggets',
       'Dallas Mavericks', 'Portland Trail Blazers', 'Milwaukee Bucks',
       'Los Angeles Clippers', 'San Antonio Spurs', 'Toronto Raptors',
       'Boston Celtics', 'Detroit Pistons', 'Memphis Grizzlies',
       'Utah Jazz', 'Phoenix Suns', 'Minnesota Timberwolves',
       'Sacramento Kings', 'Indiana Pacers', 'Brooklyn Nets',
       'Golden State Warriors', 'Atlanta Hawks', 'Miami Heat'],
      dtype=object)

In [245]:
teams = ['Philadelphia 76ers', 'Los Angeles Lakers', 'Orlando Magic',
       'Washington Wizards', 'Houston Rockets', 'New Orleans Pelicans',
       'New York Knicks', 'Chicago Bulls', 'Cleveland Cavaliers',
       'Oklahoma City Thunder', 'Charlotte Hornets', 'Denver Nuggets',
       'Dallas Mavericks', 'Portland Trail Blazers', 'Milwaukee Bucks',
       'Los Angeles Clippers', 'San Antonio Spurs', 'Toronto Raptors',
       'Boston Celtics', 'Detroit Pistons', 'Memphis Grizzlies',
       'Utah Jazz', 'Phoenix Suns', 'Minnesota Timberwolves',
       'Sacramento Kings', 'Indiana Pacers', 'Brooklyn Nets',
       'Golden State Warriors', 'Atlanta Hawks', 'Miami Heat']

teams = ['Philadelphia_76ers', 'Los_Angeles_Lakers', 'Orlando_Magic',
       'Washington_Wizards', 'Houston_Rockets', 'New_Orleans_Pelicans',
       'New_York_Knicks', 'Chicago_Bulls', 'Cleveland_Cavaliers',
       'Oklahoma_City_Thunder', 'Charlotte_Hornets', 'Denver_Nuggets',
       'Dallas_Mavericks', 'Portland_Trail_Blazers', 'Milwaukee_Bucks',
       'Los_Angeles_Clippers', 'San_Antonio_Spurs', 'Toronto_Raptors',
       'Boston_Celtics', 'Detroit_Pistons', 'Memphis_Grizzlies',
       'Utah_Jazz', 'Phoenix_Suns', 'Minnesota_Timberwolves',
       'Sacramento_Kings', 'Indiana_Pacers', 'Brooklyn_Nets',
       'Golden_State_Warriors', 'Atlanta_Hawks', 'Miami_Heat']

In [246]:
for team in teams:
    df.loc[(df['Away_team'] == team) | (df['Home_team'] == team), team] = 1  
    df.loc[(df['Away_team'] != team) & (df['Home_team'] != team), team] = 0

In [247]:
df.head(2)

Unnamed: 0,Date,Away_team,Home_team,Philadelphia 76ers,Los Angeles Lakers,Orlando Magic,Washington Wizards,Houston Rockets,New Orleans Pelicans,New York Knicks,...,Memphis Grizzlies,Utah Jazz,Phoenix Suns,Minnesota Timberwolves,Sacramento Kings,Indiana Pacers,Brooklyn Nets,Golden State Warriors,Atlanta Hawks,Miami Heat
0,2022-10-18,Philadelphia 76ers,Boston Celtics,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2022-10-18,Los Angeles Lakers,Golden State Warriors,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Last game played
Add a column for each team that contains the date of the last game they played

In [248]:
for team in teams:
    team_games = pd.Series(df[df[team]==1].Date).reset_index(drop='true')
    
    team_last_game = team_games.copy()

    team_last_game[0] = '2022-04-09 00:00:00'

    for i in range(len(team_games)-1):
        team_last_game[i+1] = team_games[i]
        
    final_series = pd.Series(team_last_game.array, pd.Series(df[df[team]==1].Date).index.array)
    
    df.loc[(df[team] == 1), team + ' last game'] = final_series

In [249]:
df.head(2)

Unnamed: 0,Date,Away_team,Home_team,Philadelphia 76ers,Los Angeles Lakers,Orlando Magic,Washington Wizards,Houston Rockets,New Orleans Pelicans,New York Knicks,...,Memphis Grizzlies last game,Utah Jazz last game,Phoenix Suns last game,Minnesota Timberwolves last game,Sacramento Kings last game,Indiana Pacers last game,Brooklyn Nets last game,Golden State Warriors last game,Atlanta Hawks last game,Miami Heat last game
0,2022-10-18,Philadelphia 76ers,Boston Celtics,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
1,2022-10-18,Los Angeles Lakers,Golden State Warriors,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,2022-04-09,NaT,NaT


## Days since last game
Adding columns that has the number of days since the home and away teams last games

In [250]:
away_team = (df['Away_team'] + ' last game').array
home_team = (df['Home_team'] + ' last game').array

In [251]:
# Come back to this.
# Should  find a way to do this without iterating through the dataframe
for i in (df['Home_team'] + ' last game').index:
    df.loc[i, 'away_last_date'] = df.loc[i, away_team[i]]
    df.loc[i, 'home_last_Date'] = df.loc[i, home_team[i]]

In [252]:
df['away_team_dslg'] = (df['Date'] - df['away_last_date']).dt.days
df['home_team_dslg'] = (df['Date'] - df['home_last_Date']).dt.days

In [253]:
df.tail(3)

Unnamed: 0,Date,Away_team,Home_team,Philadelphia 76ers,Los Angeles Lakers,Orlando Magic,Washington Wizards,Houston Rockets,New Orleans Pelicans,New York Knicks,...,Sacramento Kings last game,Indiana Pacers last game,Brooklyn Nets last game,Golden State Warriors last game,Atlanta Hawks last game,Miami Heat last game,away_last_date,home_last_Date,away_team_dslg,home_team_dslg
1227,2023-04-09,Memphis Grizzlies,Oklahoma City Thunder,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,2023-04-07,2023-04-06,2,3
1228,2023-04-09,Los Angeles Clippers,Phoenix Suns,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,2023-04-08,2023-04-07,1,2
1229,2023-04-09,Golden State Warriors,Portland Trail Blazers,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,NaT,NaT,NaT,2023-04-07,NaT,NaT,2023-04-07,2023-04-08,2,1
