In [41]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from itertools import combinations
from collections import OrderedDict

In [13]:
#get player teams into dataframe from player_teams.csv
player_teams = pd.read_csv('../basketballPlayoffs/players_teams.csv')

#drop unneeded columns
player_teams.drop('lgID', axis=1, inplace=True)

# Assuming you have a column 'year' to sort by
player_teams = player_teams.sort_values(by=['playerID', 'year'])
player_teams['career_year'] = player_teams.groupby('playerID').cumcount() + 1

def compute_percentage(numerator, denominator):
    return round(numerator.divide(denominator).where(denominator != 0, 0.0)*100,2)

# Regular Season Percentages
player_teams['ft%'] = compute_percentage(player_teams['ftMade'], player_teams['ftAttempted'])
player_teams['fg%'] = compute_percentage(player_teams['fgMade'], player_teams['fgAttempted'])
player_teams['three%'] = compute_percentage(player_teams['threeMade'], player_teams['threeAttempted'])
player_teams['gs%'] = compute_percentage(player_teams['GS'], player_teams['GP'])

# Playoffs Percentages
player_teams['Postft%'] = compute_percentage(player_teams['PostftMade'], player_teams['PostftAttempted'])
player_teams['Postfg%'] = compute_percentage(player_teams['PostfgMade'], player_teams['PostfgAttempted'])
player_teams['Postthree%'] = compute_percentage(player_teams['PostthreeMade'], player_teams['PostthreeAttempted'])
player_teams['Postgs%'] = compute_percentage(player_teams['PostGS'], player_teams['PostGP'])


player_teams.head()

Unnamed: 0,playerID,year,stint,tmID,GP,GS,minutes,points,oRebounds,dRebounds,...,PostDQ,career_year,ft%,fg%,three%,gs%,Postft%,Postfg%,Postthree%,Postgs%
0,abrossv01w,2,0,MIN,26,23,846,343,43,131,...,0,1,72.73,38.91,25.0,88.46,0.0,0.0,0.0,0.0
1,abrossv01w,3,0,MIN,27,27,805,314,45,101,...,0,2,48.28,37.66,33.33,100.0,0.0,0.0,0.0,0.0
2,abrossv01w,4,0,MIN,30,25,792,318,44,97,...,0,3,70.41,39.3,30.49,83.33,100.0,27.27,42.86,100.0
3,abrossv01w,5,0,MIN,22,11,462,146,17,57,...,0,4,60.87,35.25,37.74,50.0,50.0,34.78,25.0,100.0
4,abrossv01w,6,0,MIN,31,31,777,304,29,78,...,0,5,72.6,39.49,40.24,100.0,0.0,0.0,0.0,0.0


In [40]:
#get player teams into dataframe from player_teams.csv
coaches = pd.read_csv('../basketballPlayoffs/coaches.csv')

#drop unneeded columns
coaches.drop('lgID', axis=1, inplace=True)

coaches['total_games'] = coaches['won']+ coaches['lost']
coaches['W%'] = compute_percentage(coaches['won'],coaches['total_games'])

coaches['total_p_games'] = coaches['post_wins']+ coaches['post_losses']
coaches['postW%'] = compute_percentage(coaches['post_wins'],coaches['total_p_games'])

coaches.head()

Unnamed: 0,coachID,year,tmID,stint,won,lost,post_wins,post_losses,total_games,W%,total_p_games,postW%
0,adamsmi01w,5,WAS,0,17,17,1,2,34,50.0,3,33.33
1,adubari99w,1,NYL,0,20,12,4,3,32,62.5,7,57.14
2,adubari99w,2,NYL,0,21,11,3,3,32,65.62,6,50.0
3,adubari99w,3,NYL,0,18,14,4,4,32,56.25,8,50.0
4,adubari99w,4,NYL,0,16,18,0,0,34,47.06,0,0.0


In [44]:
#teams match up results (only post is available)
series_post = pd.read_csv('../basketballPlayoffs/series_post.csv')


# Get all unique teams
unique_teams = set(series_post['tmIDWinner']).union(series_post['tmIDLoser'])
print(f"Total number of unique teams: {len(unique_teams)}")

# Create all possible combinations of matchups (excluding self-matchups)
matchups = [f"{teams[0]} vs {teams[1]}" for teams in combinations(sorted(unique_teams), 2)]

# Initialize the records for all matchups
records = OrderedDict((matchup, {'won': 0, 'lost': 0}) for matchup in matchups)

# Update win-loss counts
for _, row in series_post.iterrows():
    teams = sorted([row['tmIDWinner'], row['tmIDLoser']])
    key = f"{teams[0]} vs {teams[1]}"
    records[key]['won'] += row['W']
    records[key]['lost'] += row['L']

# Convert the OrderedDict to a DataFrame
team_matchups = pd.DataFrame.from_dict(records, orient='index').reset_index()
team_matchups.columns = ['matchup', 'total_wins', 'total_losses']

print(team_matchups.head())


Total number of unique teams: 18
      matchup  total_wins  total_losses
0  ATL vs CHA           0             0
1  ATL vs CLE           0             0
2  ATL vs CON           0             0
3  ATL vs DET           2             0
4  ATL vs HOU           0             0
