In [56]:
import os
import time
import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder

In [None]:
project_dir = r"D:\NBANextGamePredictor" # Choose a Path to save your files (such as .ipnyb, .py, .csv)
if not os.path.exists(project_dir):
    print(f"Creating: {project_dir}")
    os.makedirs(project_dir)

os.chdir(project_dir)
print("Working directory:", os.getcwd())

In [58]:
subdirectory = os.path.join(project_dir, "csvs")
if not os.path.exists(subdirectory):
    print(f"Creating subdirectory for CSVs: {subdirectory}")
    os.makedirs(subdirectory)

In [59]:
seasons = ["2021-22", "2022-23", "2023-24"]
teams_abbr = [
    "BOS", "BKN", "NYK", "PHI", "TOR",
    "CHI", "CLE", "DET", "IND", "MIL",
    "ATL", "CHA", "MIA", "ORL", "WAS",
    "DEN", "MIN", "OKC", "POR", "UTA",
    "GSW", "LAC", "LAL", "PHX", "SAC",
    "DAL", "HOU", "MEM", "NOP", "SAS"
]
games = []

In [60]:
for season in seasons:
    gameFinder = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable="Regular Season")
    seasonGames = gameFinder.get_data_frames()[0]
    seasonGames = seasonGames[seasonGames['TEAM_ABBREVIATION'].isin(teams_abbr)]
    games.append(seasonGames)
    time.sleep(2)

games = pd.concat(games, ignore_index=True)
games.to_csv(os.path.join(subdirectory, 'all_games_raw.csv'), index=False)

In [None]:
subdirectory = os.path.join(project_dir, r"csvs\team_files_raw")
if not os.path.exists(subdirectory):
    print(f"Creating subdirectory for 30 team files: {subdirectory}")
    os.makedirs(subdirectory)

In [63]:
for team in teams_abbr:
    team_games = games[games['TEAM_ABBREVIATION'] == team]
    team_file_path = os.path.join(subdirectory, f"{team}_games_stats.csv")
    team_games.to_csv(team_file_path, index=False)

In [None]:
subdirectory = os.path.join(project_dir, r"csvs\modified_team_files_raw")
if not os.path.exists(subdirectory):
    print(f"Creating subdirectory for 30 modified team files: {subdirectory}")
    os.makedirs(subdirectory)

In [65]:
input_dir = os.path.join(project_dir, r"csvs\team_files_raw")
output_dir = os.path.join(project_dir, r"csvs\modified_team_files_raw")

team_files = [f for f in os.listdir(input_dir) if f.endswith(".csv")]

for team in team_files:
    team_path = os.path.join(input_dir, team)
    team_data = pd.read_csv(team_path)
    
    if 'GAME_DATE' in team_data.columns:
        team_data['GAME_DATE'] = pd.to_datetime(team_data['GAME_DATE'])
        team_data = team_data.sort_values('GAME_DATE').reset_index(drop=True)
    
    numeric_cols = team_data.select_dtypes(include=['number']).columns

    modified_data = team_data.copy()
    
    modified_data[numeric_cols] = modified_data[numeric_cols].astype(float)
    
    for i in range(len(team_data)):
        if i == 0:
            modified_data.loc[i, numeric_cols] = team_data.loc[i, numeric_cols]
        else:
            start_idx = max(0, i - 10)  
            modified_data.loc[i, numeric_cols] = team_data.loc[start_idx:i-1, numeric_cols].mean()
    
    output_path = os.path.join(output_dir, f"modified_{team}")
    modified_data.to_csv(output_path, index=False)

In [66]:
output_dir = os.path.join(project_dir, r"csvs")

games_one_row = games.groupby("GAME_ID")

team_one = games_one_row.nth(0).add_prefix("T1_")
modified_team_one_raw = pd.DataFrame(team_one)
output_dir = os.path.join(project_dir, r"csvs")
output_dir = os.path.join(output_dir, 'all_games_team_one.csv')
modified_team_one_raw.to_csv(output_dir, index=False)

team_two = games_one_row.nth(1).add_prefix("T2_")
modified_team_two_raw = pd.DataFrame(team_two)
output_dir = os.path.join(project_dir, r"csvs")
output_dir = os.path.join(output_dir, 'all_games_team_two.csv')
modified_team_two_raw.to_csv(output_dir, index=False)

modified_all_games_raw = pd.merge(team_one, team_two, left_on="T1_GAME_ID", right_on="T2_GAME_ID").reset_index().drop(columns='index')
output_dir = os.path.join(project_dir, r"csvs")
output_dir = os.path.join(output_dir, 'modified_all_games_raw.csv')
modified_all_games_raw.to_csv(output_dir, index=False)

In [67]:
modified_all_games_raw["T1_RB"] = modified_all_games_raw["T1_OREB"] + modified_all_games_raw["T1_DREB"]
modified_all_games_raw["T2_RB"] = modified_all_games_raw["T2_OREB"] + modified_all_games_raw["T2_DREB"]

In [68]:
columns_to_keep = [
    "T1_GAME_ID", "T1_GAME_DATE", "T1_TEAM_ABBREVIATION", 
    "T1_PTS", "T1_FG_PCT", "T1_FG3_PCT", "T1_FT_PCT", "T1_RB", "T1_AST", "T1_TOV", "T1_PF", 
    "T2_PTS", "T2_FG_PCT", "T2_FG3_PCT", "T2_FT_PCT", "T2_RB", "T2_AST", "T2_TOV", "T2_PF", 
    "T1_WL"
]
filteredData = modified_all_games_raw[columns_to_keep]
output_dir = os.path.join(project_dir, r"csvs")
output_dir = os.path.join(output_dir, 'filtered_all_games.csv')
filteredData.to_csv(output_dir, index=False)

In [None]:
subdirectory = os.path.join(project_dir, r"csvs\team_files_one_row")
if not os.path.exists(subdirectory):
    print(f"Creating subdirectory for 30 team files (+opponents stats): {subdirectory}")
    os.makedirs(subdirectory)

In [None]:
for team in teams_abbr:
    team_games_with_opp = filteredData[filteredData['T1_TEAM_ABBREVIATION'] == team]
    team_file_path = os.path.join(subdirectory, f"{team}_games_wo_stats.csv")
    team_games_with_opp.to_csv(team_file_path, index=False)

In [None]:
subdirectory = os.path.join(project_dir, r"csvs\modified_team_files_one_row")
if not os.path.exists(subdirectory):
    print(f"Creating subdirectory for 30 modified team files: {subdirectory}")
    os.makedirs(subdirectory)

In [78]:
input_dir = os.path.join(project_dir, r"csvs\team_files_one_row")
output_dir = os.path.join(project_dir, r"csvs\modified_team_files_one_row")

team_files = [f for f in os.listdir(input_dir) if f.endswith(".csv")]

for team in team_files:
    team_path = os.path.join(input_dir, team)
    team_data = pd.read_csv(team_path)
    
    if 'T1_GAME_DATE' in team_data.columns:
        team_data['T1_GAME_DATE'] = pd.to_datetime(team_data['T1_GAME_DATE'])
        team_data = team_data.sort_values('T1_GAME_DATE').reset_index(drop=True)
    
    numeric_cols = [
        "T1_PTS", "T1_FG_PCT", "T1_FG3_PCT", "T1_FT_PCT", "T1_RB", "T1_AST", "T1_TOV", "T1_PF", 
        "T2_PTS", "T2_FG_PCT", "T2_FG3_PCT", "T2_FT_PCT", "T2_RB", "T2_AST", "T2_TOV", "T2_PF"
    ]
    
    modified_data = team_data.copy()

    modified_data[numeric_cols] = modified_data[numeric_cols].astype(float)
    
    for i in range(len(team_data)):
        if i == 0:
            modified_data.loc[i, numeric_cols] = team_data.loc[i, numeric_cols]
        else:
            start_idx = max(0, i - 10)  
            modified_data.loc[i, numeric_cols] = team_data.loc[start_idx:i-1, numeric_cols].mean()
            
    percentage_columns = ["T1_FG_PCT", "T1_FG3_PCT", "T1_FT_PCT", "T2_FG_PCT", "T2_FG3_PCT", "T2_FT_PCT"]
    modified_data[percentage_columns] = modified_data[percentage_columns] * 100
    
    modified_data[numeric_cols] = modified_data[numeric_cols].round(2)
    
    output_path = os.path.join(output_dir, f"modified_{team}")
    modified_data.to_csv(output_path, index=False)