# Extracting trade data from player dataset

In this notebook we will extract information on trades from the player dataset (refer to `downloading_player_data` notebook). To do this we will look at pairs of rows in the player data where the team name is different for the same player (possibly same or different season year). Difference in team name would suggest that the player is traded to the team in the 2nd row (from the 2 rows being compared) and we will extract all such trades and store them in a separate csv file.

In [1]:
import pandas as pd 
import os

Merging player data into one dataframe :

In [2]:
player_data_dir = "../data/raw/players_data"


player_data_frames = []


for filename in os.listdir(player_data_dir):
    if filename.endswith(".csv"):
        file_path = os.path.join(player_data_dir, filename)
        player_name = filename.split("_")[0]  
        df = pd.read_csv(file_path)
        df['player_name'] = player_name
        player_data_frames.append(df)
player_data_combined = pd.concat(player_data_frames, ignore_index=True)


player_data_combined

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,player_name
0,920,1985-86,0,1610612747,LAL,22.0,82,1.0,1542.0,209,...,160.0,221.0,381.0,54,49.0,49.0,99.0,229,521,A.C. Green
1,920,1986-87,0,1610612747,LAL,23.0,79,72.0,2240.0,316,...,210.0,405.0,615.0,84,70.0,80.0,102.0,171,852,A.C. Green
2,920,1987-88,0,1610612747,LAL,24.0,82,64.0,2636.0,322,...,245.0,465.0,710.0,93,87.0,45.0,120.0,204,937,A.C. Green
3,920,1988-89,0,1610612747,LAL,25.0,82,82.0,2510.0,401,...,258.0,481.0,739.0,103,94.0,55.0,119.0,172,1088,A.C. Green
4,920,1989-90,0,1610612747,LAL,26.0,82,82.0,2709.0,385,...,262.0,450.0,712.0,90,66.0,50.0,116.0,207,1061,A.C. Green
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29870,980,2008-09,0,1610612739,CLE,34.0,65,65.0,1765.0,342,...,157.0,333.0,490.0,64,28.0,84.0,90.0,183,838,Zydrunas Ilgauskas
29871,980,2009-10,0,1610612739,CLE,35.0,64,6.0,1339.0,194,...,114.0,231.0,345.0,48,14.0,50.0,63.0,183,474,Zydrunas Ilgauskas
29872,980,2010-11,0,1610612748,MIA,36.0,72,51.0,1145.0,162,...,108.0,179.0,287.0,26,23.0,58.0,52.0,185,360,Zydrunas Ilgauskas
29873,1629597,2019-20,0,1610612740,NOP,24.0,4,0.0,51.0,6,...,3.0,6.0,9.0,3,1.0,1.0,4.0,10,12,Zylan Cheatham


In [3]:
team_data_dir = "../data/raw/teams_regSeason_data"
team_id_name_map = {}

for filename in os.listdir(team_data_dir):
    
    if filename.endswith(".csv"):
        team_name, team_id = filename.split("_", 1)  
        team_id = team_id.split(".")[0]  
        team_id_name_map[int(team_id)] = team_name


team_id_name_map

{1610612737: 'Atlanta Hawks',
 1610612738: 'Boston Celtics',
 1610612751: 'Brooklyn Nets',
 1610612766: 'Charlotte Hornets',
 1610612741: 'Chicago Bulls',
 1610612739: 'Cleveland Cavaliers',
 1610612742: 'Dallas Mavericks',
 1610612743: 'Denver Nuggets',
 1610612765: 'Detroit Pistons',
 1610612744: 'Golden State Warriors',
 1610612745: 'Houston Rockets',
 1610612754: 'Indiana Pacers',
 1610612746: 'Los Angeles Clippers',
 1610612747: 'Los Angeles Lakers',
 1610612763: 'Memphis Grizzlies',
 1610612748: 'Miami Heat',
 1610612749: 'Milwaukee Bucks',
 1610612750: 'Minnesota Timberwolves',
 1610612740: 'New Orleans Pelicans',
 1610612752: 'New York Knicks',
 1610612760: 'Oklahoma City Thunder',
 1610612753: 'Orlando Magic',
 1610612755: 'Philadelphia 76ers',
 1610612756: 'Phoenix Suns',
 1610612757: 'Portland Trail Blazers',
 1610612758: 'Sacramento Kings',
 1610612759: 'San Antonio Spurs',
 1610612761: 'Toronto Raptors',
 1610612762: 'Utah Jazz',
 1610612764: 'Washington Wizards'}

In [4]:
player_data_combined['team_name'] = player_data_combined['TEAM_ID'].map(team_id_name_map)

In [5]:
player_data_combined.columns

Index(['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'player_name', 'team_name'],
      dtype='object')

In [6]:
# function to extract all trades from the player dataset using the logic mentioned above
def extract_trades(df):
    trades = []
    for player_name, player_data in df.groupby('player_name'):
        prev_team = None
        for idx, row in player_data.iterrows():
            if prev_team is not None and row['team_name'] != prev_team: # check if team has changed
                trades.append({
                    'player_id' : row['PLAYER_ID'],
                    'player_name': player_name,
                    'trade_year': row['SEASON_ID'],
                    'team_traded_from': prev_team,
                    'team_traded_to': row['team_name']
                })
            prev_team = row['team_name']
    return pd.DataFrame(trades)


trade_df = extract_trades(player_data_combined)

In [7]:
trade_df

Unnamed: 0,player_id,player_name,trade_year,team_traded_from,team_traded_to
0,920,A.C. Green,1993-94,Los Angeles Lakers,Phoenix Suns
1,920,A.C. Green,1996-97,Phoenix Suns,Dallas Mavericks
2,920,A.C. Green,1996-97,Dallas Mavericks,
3,920,A.C. Green,1997-98,,Dallas Mavericks
4,920,A.C. Green,1999-00,Dallas Mavericks,Los Angeles Lakers
...,...,...,...,...,...
12522,1985,Zendon Hamilton,2005-06,Philadelphia 76ers,
12523,204054,Zoran Dragic,2014-15,Phoenix Suns,Miami Heat
12524,204054,Zoran Dragic,2014-15,Miami Heat,
12525,980,Zydrunas Ilgauskas,2010-11,Cleveland Cavaliers,Miami Heat


Teams  `'Kyrie Irving'` has been traded to over the years :

In [8]:
trade_df[trade_df['player_name']== 'Kyrie Irving']

Unnamed: 0,player_id,player_name,trade_year,team_traded_from,team_traded_to
7372,202681,Kyrie Irving,2017-18,Cleveland Cavaliers,Boston Celtics
7373,202681,Kyrie Irving,2019-20,Boston Celtics,Brooklyn Nets
7374,202681,Kyrie Irving,2022-23,Brooklyn Nets,Dallas Mavericks
7375,202681,Kyrie Irving,2022-23,Dallas Mavericks,
7376,202681,Kyrie Irving,2023-24,,Dallas Mavericks


As we can see there are rows that have 'NaN' values which indicate that the player was not traded that particular season. We will take this into account when cleaning the data (refer to `clean_trade_data` notebook)

Lets save this data to a csv file :

In [9]:
output_dir = os.path.join("..", "data", "raw", "trade_data")
os.makedirs(output_dir, exist_ok=True)
output_filename = os.path.join(output_dir, "trade_data.csv")
trade_df.to_csv(output_filename, index=False)