In [1]:
import os
import glob
import pandas as pd
import numpy as np

main_directory = os.path.normpath(os.getcwd() + os.sep + os.pardir)
data_directory = os.path.join(main_directory, 'TeamStats')
file_names = [f for f in os.listdir(data_directory) if os.path.isfile(os.path.join(data_directory, f))]



In [2]:
#seperate excel files

off_files = [file for file in file_names if 'Off.csv' in file]
def_files = [file for file in file_names if 'Def.csv' in file]
adv_files = [file for file in file_names if 'Adv.csv' in file]
playoff_files = [file for file in file_names if 'Playoffs.csv' in file]

off_files = sorted(off_files)
def_files = sorted(def_files)
adv_files = sorted(adv_files)
playoff_files = sorted(playoff_files)


In [15]:

# Dictionary to store DataFrames
team_stats_dfs = {}

for i in range(24):

    # Cleanup defensive stats
    def_stats = pd.read_csv(def_files[i])
    def_stats = def_stats.drop(columns=['Rk','Team▲','G'])
    def_stats = def_stats.add_prefix('Opp ')
    def_stats.drop(def_stats.tail(1).index, inplace=True)
    
    # Cleanup advanced stats
    adv_stats = pd.read_csv(adv_files[i], header=1)
    adv_stats = adv_stats.drop(columns=['Rk','Team▲','Unnamed: 17','Unnamed: 22','Unnamed: 27','Arena'])
    adv_stats = adv_stats.rename(columns={'eFG%.1': 'Opp eFG%', 'TOV%.1': 'Opp TOV%', 'FT/FGA.1': 'Opp FT/FGA'})
    adv_stats = adv_stats.rename(columns={'Attend.': 'Attend', 'Attend./G': 'Attend/G'})
    adv_stats.drop(adv_stats.tail(1).index, inplace=True)
    
    # Cleanup main team stats (offensive)
    off_stats = pd.read_csv(off_files[i])
    off_stats = off_stats.drop(columns=['Rk'])
    off_stats.drop(off_stats.tail(1).index, inplace=True)
    off_stats['Team▲'] = off_stats['Team▲'].str.replace('*', '')
    off_stats = off_stats.rename(columns={'Team▲': 'Team'})

    # Make one big df of every stat
    team_stats = pd.concat([off_stats, def_stats, adv_stats], axis=1)

    # Read in playoff stats
    playoff = pd.read_csv(playoff_files[i], header=1)
    columns_to_keep = ['Team', 'W', 'L']
    playoff = playoff[columns_to_keep]
    playoff.drop(playoff.tail(1).index, inplace=True)

    # New DataFrame for just team names of all the teams
    all_team_names = team_stats['Team']
    all_team_names_df = pd.DataFrame(all_team_names, columns=['Team'])

    # Add the teams that missed the playoffs
    all_teams = pd.concat([playoff, all_team_names_df]).drop_duplicates(subset=['Team'])

    # Replace NaN with 0
    all_teams = all_teams.fillna(0)

    # Sort by wins and losses to categorize
    all_teams.sort_values(by=['W', 'L'], ascending=[True, True], inplace=True)

    # Create the groups; lower number means you got further in the bracket
    x = len(all_teams) - 16
    group_num = [6] * x + [5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 2, 1]

    # Add the "Playoff Success" column to the DataFrame
    all_teams = all_teams.assign(Playoff_Success=group_num)

    # Sort by team alphabetically
    all_teams.sort_values(by='Team', inplace=True)

    # Reset index
    all_teams.reset_index(drop=True, inplace=True)

    # Put the new vector in the main df
    playoff_success = all_teams.drop(columns=['Team', 'W', 'L'])
    team_stats = pd.concat([team_stats, playoff_success], axis=1)
   
    team_stats_dfs[i] = team_stats


In [17]:
team_stats_dfs[20]
print(team_stats_dfs[20].columns)
team_stats_dfs[20]

Index(['Team', 'G', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA',
       '2P%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'Opp MP', 'Opp FG', 'Opp FGA', 'Opp FG%', 'Opp 3P',
       'Opp 3PA', 'Opp 3P%', 'Opp 2P', 'Opp 2PA', 'Opp 2P%', 'Opp FT',
       'Opp FTA', 'Opp FT%', 'Opp ORB', 'Opp DRB', 'Opp TRB', 'Opp AST',
       'Opp STL', 'Opp BLK', 'Opp TOV', 'Opp PF', 'Opp PTS', 'Age', 'W', 'L',
       'PW', 'PL', 'MOV', 'SOS', 'SRS', 'ORtg', 'DRtg', 'NRtg', 'Pace', 'FTr',
       '3PAr', 'TS%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', 'Opp eFG%', 'Opp TOV%',
       'DRB%', 'Opp FT/FGA', 'Attend', 'Attend/G', 'Playoff_Success'],
      dtype='object')


Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,TOV%,ORB%,FT/FGA,Opp eFG%,Opp TOV%,DRB%,Opp FT/FGA,Attend,Attend/G,Playoff_Success
0,Atlanta Hawks,67,243.0,40.6,90.6,0.449,12.0,36.1,0.333,28.6,...,13.8,21.6,0.204,0.543,12.7,74.9,0.233,545453,16043,6
1,Boston Celtics,72,242.1,41.3,89.6,0.461,12.6,34.5,0.364,28.7,...,12.2,23.9,0.207,0.509,13.5,77.4,0.215,610864,19090,3
2,Brooklyn Nets,72,242.8,40.4,90.3,0.448,13.1,38.1,0.343,27.3,...,13.1,23.2,0.199,0.511,11.0,77.8,0.187,524907,16403,5
3,Charlotte Hornets,65,242.3,37.3,85.9,0.434,12.1,34.3,0.352,25.2,...,13.3,23.9,0.188,0.546,13.1,74.4,0.159,478591,15428,6
4,Chicago Bulls,65,241.2,39.6,88.6,0.447,12.2,35.1,0.348,27.4,...,13.7,22.8,0.175,0.546,16.3,75.6,0.239,639352,18804,6
5,Cleveland Cavaliers,65,241.9,40.3,87.9,0.458,11.2,31.8,0.351,29.1,...,14.6,24.6,0.172,0.56,11.7,77.4,0.164,643008,17861,6
6,Dallas Mavericks,75,242.3,41.7,90.3,0.461,15.1,41.3,0.367,26.5,...,11.2,23.2,0.206,0.525,10.6,77.7,0.175,682096,20062,5
7,Denver Nuggets,73,243.1,42.0,88.9,0.473,11.0,30.6,0.359,31.1,...,12.3,24.8,0.183,0.533,12.9,76.8,0.198,633153,19186,3
8,Detroit Pistons,66,241.9,39.3,85.7,0.459,12.0,32.7,0.367,27.3,...,13.8,22.6,0.194,0.541,12.7,75.9,0.186,509469,15294,6
9,Golden State Warriors,65,241.9,38.6,88.2,0.438,10.4,31.3,0.334,28.2,...,13.2,21.5,0.212,0.553,13.7,76.4,0.193,614176,18064,6
