In [1]:
import pandas as pd
import pymysql

game_details = pd.read_csv('../data/games_details.csv')
games = pd.read_csv('../data/games.csv')
players = pd.read_csv('../data/players.csv')
ranking = pd.read_csv('../data/ranking.csv')
teams = pd.read_csv('../data/teams.csv')

## Database connection

In [2]:
connection = pymysql.connect(host='localhost', user='auth_user', password='Aauth123', db='nba_stats')
cursor = connection.cursor()

## Arenas
Let's create the clean data for ARENA's Table

In [3]:
unique_arenas = teams.groupby(['ARENA']).size().reset_index(name='Count').sort_values(by='ARENA', ascending=False)
unique_arenas

Unnamed: 0,ARENA,Count
28,Wells Fargo Center,1
27,Vivint Smart Home Arena,1
26,United Center,1
25,Toyota Center,1
24,Target Center,1
23,Talking Stick Resort Arena,1
22,TD Garden,1
21,State Farm Arena,1
20,Staples Center,2
19,Spectrum Center,1


There don't seem to be syntax errors in the unique arenas.
In this case only the staple stadium has 2 records, let's see if arena's capacity matches.

In [4]:
filtered_rows = teams[teams['ARENA'] == 'Staples Center']['ARENACAPACITY']
filtered_rows

7    19060.0
8    19060.0
Name: ARENACAPACITY, dtype: float64

In [5]:
for _, row in teams.drop_duplicates(subset=['ARENA', 'ARENACAPACITY'])[['ARENA', 'ARENACAPACITY']].fillna(0).iterrows():
    cursor.execute("""
        INSERT INTO arenas (ARENA_NAME, ARENACAPACITY) 
        VALUES (%s, %s) 
    """, (row['ARENA'], row['ARENACAPACITY']))

connection.commit()


## TEAMS & TEAM STAFF

In [6]:
unique_teams = teams.groupby(['TEAM_ID', 'NICKNAME']).size().reset_index(name='Count').sort_values(by='NICKNAME', ascending=False)
print(len(unique_teams))
unique_teams

30


Unnamed: 0,TEAM_ID,NICKNAME,Count
27,1610612764,Wizards,1
7,1610612744,Warriors,1
20,1610612757,Trail Blazers,1
13,1610612750,Timberwolves,1
23,1610612760,Thunder,1
19,1610612756,Suns,1
22,1610612759,Spurs,1
8,1610612745,Rockets,1
24,1610612761,Raptors,1
28,1610612765,Pistons,1


In [7]:
unique_teams = teams.groupby(['TEAM_ID', 'ABBREVIATION']).size().reset_index(name='Count').sort_values(by='ABBREVIATION', ascending=False)
print(len(unique_teams))
unique_teams

30


Unnamed: 0,TEAM_ID,ABBREVIATION,Count
27,1610612764,WAS,1
25,1610612762,UTA,1
24,1610612761,TOR,1
22,1610612759,SAS,1
21,1610612758,SAC,1
20,1610612757,POR,1
19,1610612756,PHX,1
18,1610612755,PHI,1
16,1610612753,ORL,1
23,1610612760,OKC,1


In [8]:
def value_in_other_rows(row, df):
    for value in row:
        if df.drop(index=row.name).isin([value]).sum().sum() > 0:
            return True
    return False

teams['value_in_other_rows'] = teams[['OWNER', 'GENERALMANAGER', 'HEADCOACH', 'DLEAGUEAFFILIATION']].apply(value_in_other_rows, axis=1, args=(teams,))
teams[['OWNER', 'GENERALMANAGER', 'HEADCOACH', 'DLEAGUEAFFILIATION', 'value_in_other_rows']]

Unnamed: 0,OWNER,GENERALMANAGER,HEADCOACH,DLEAGUEAFFILIATION,value_in_other_rows
0,Tony Ressler,Travis Schlenk,Lloyd Pierce,Erie Bayhawks,False
1,Wyc Grousbeck,Danny Ainge,Brad Stevens,Maine Red Claws,False
2,Tom Benson,Trajan Langdon,Alvin Gentry,No Affiliate,True
3,Jerry Reinsdorf,Gar Forman,Jim Boylen,Windy City Bulls,False
4,Mark Cuban,Donnie Nelson,Rick Carlisle,Texas Legends,False
5,Stan Kroenke,Tim Connelly,Michael Malone,No Affiliate,True
6,Tilman Fertitta,Daryl Morey,Mike D'Antoni,Rio Grande Valley Vipers,False
7,Steve Ballmer,Michael Winger,Doc Rivers,Agua Caliente Clippers of Ontario,False
8,Jerry Buss Family Trust,Rob Pelinka,Frank Vogel,South Bay Lakers,False
9,Micky Arison,Pat Riley,Erik Spoelstra,Sioux Falls Skyforce,False


Now I can see that team that is unique and is safe to be migrated to the database. This applies to team staff as no name is repeated across roles or teams.

In [9]:
for _, row in teams.drop_duplicates(subset=['TEAM_ID']).fillna(0).iterrows():
    # I'm retrieving the arena_id as it was autogenerated in the previous step
    cursor.execute("SELECT ARENA_ID FROM arenas WHERE ARENA_NAME = %s", (row['ARENA'],))
    arena_id_result = cursor.fetchone()
    
    if arena_id_result:
        arena_id = arena_id_result[0]
        cursor.execute("INSERT INTO teams (TEAM_ID, LEAGUE_ID, MIN_YEAR, MAX_YEAR, ABBREVIATION, NICKNAME, YEARFOUNDED, CITY, ARENA_ID) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", 
                   (row['TEAM_ID'], row['LEAGUE_ID'], row['MIN_YEAR'], row['MAX_YEAR'], row['ABBREVIATION'], row['NICKNAME'], row['YEARFOUNDED'], row['CITY'], arena_id))
    else:
        print(f"Error: Arena named {row['ARENA']} not found.")
    cursor.execute("INSERT INTO team_staff VALUES (%s, %s, %s, %s, %s)", 
                (row['TEAM_ID'], row['OWNER'], row['GENERALMANAGER'], 
                row['HEADCOACH'], row['DLEAGUEAFFILIATION']))
connection.commit()


## PLAYERS &  TEAM Associations

In [10]:
unique_teams = players.groupby(['PLAYER_ID', 'PLAYER_NAME', 'SEASON']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
print(len(unique_teams))
unique_teams.head(30)

6305


Unnamed: 0,PLAYER_ID,PLAYER_NAME,SEASON,Count
2917,202178,Sundiata Gaines,2010,4
5889,1628502,Nigel Hayes,2017,4
4757,203930,Sean Kilpatrick,2017,4
3113,202343,Elliot Williams,2014,4
6124,1629150,Emanuel Terry,2018,4
4479,203516,James Ennis III,2015,3
4245,203462,Archie Goodwin,2016,3
4091,203144,Alexey Shved,2014,3
2841,202066,Garrett Temple,2010,3
5340,1627362,Briante Weber,2017,3


In [11]:
filtered_rows = players[players['PLAYER_NAME'] == 'Sundiata Gaines']
filtered_rows

Unnamed: 0,PLAYER_NAME,TEAM_ID,PLAYER_ID,SEASON
5388,Sundiata Gaines,1610612754,202178,2012
5675,Sundiata Gaines,1610612751,202178,2011
6419,Sundiata Gaines,1610612751,202178,2010
6508,Sundiata Gaines,1610612761,202178,2010
6519,Sundiata Gaines,1610612750,202178,2010
6592,Sundiata Gaines,1610612762,202178,2010
6704,Sundiata Gaines,1610612762,202178,2009


In [12]:
unique_teams = players.groupby(['TEAM_ID', 'PLAYER_ID', 'PLAYER_NAME', 'SEASON']).size().reset_index(name='Count').sort_values(by='PLAYER_NAME', ascending=False)
print(len(unique_teams))
unique_teams

7228


Unnamed: 0,TEAM_ID,PLAYER_ID,PLAYER_NAME,SEASON,Count
1035,1610612740,1629597,Zylan Cheatham,2019,1
2714,1610612748,980,Zydrunas Ilgauskas,2010,1
496,1610612739,980,Zydrunas Ilgauskas,2009,1
2903,1610612748,204054,Zoran Dragic,2014,1
4785,1610612756,204054,Zoran Dragic,2014,1
...,...,...,...,...,...
4225,1610612754,201985,AJ Price,2010,1
6635,1610612764,201985,AJ Price,2012,1
4226,1610612754,201985,AJ Price,2011,1
2922,1610612748,1627773,AJ Hammons,2017,1


According to this, considering that a player might change from team after a season the players table has no duplicates and can safely be inserted into players table. But for a season a player might have been o several teams... For example, Sundiata Gaines who had 10days contract in 2010 and helped several teams.

In [13]:
# Inserting just players
for _, row in players.drop_duplicates(subset=['PLAYER_ID', 'PLAYER_NAME'])[['PLAYER_ID', 'PLAYER_NAME']].fillna(0).iterrows():
    cursor.execute("INSERT INTO players VALUES (%s, %s)", 
                   (row['PLAYER_ID'], row['PLAYER_NAME']))
connection.commit()


In [14]:
# Inserting jthe relationship
for _, row in players.drop_duplicates(subset=['PLAYER_ID', 'TEAM_ID', 'SEASON'])[['PLAYER_ID', 'TEAM_ID', 'SEASON']].fillna(0).iterrows():
    cursor.execute("INSERT INTO player_team_associations (PLAYER_ID, TEAM_ID, SEASON)  VALUES (%s, %s,  %s)", 
                   (row['PLAYER_ID'], row['TEAM_ID'], row['SEASON']))
connection.commit()


## GAME and TEAM STATS (not details)

In [15]:
filtered_df = games[games['HOME_TEAM_ID'] != games['TEAM_ID_home']]
filtered_df

Unnamed: 0,GAME_DATE_EST,GAME_ID,GAME_STATUS_TEXT,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,PTS_home,FG_PCT_home,FT_PCT_home,...,AST_home,REB_home,TEAM_ID_away,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS


In [16]:
filtered_df = games[games['VISITOR_TEAM_ID'] != games['TEAM_ID_away']]
filtered_df

Unnamed: 0,GAME_DATE_EST,GAME_ID,GAME_STATUS_TEXT,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,TEAM_ID_home,PTS_home,FG_PCT_home,FT_PCT_home,...,AST_home,REB_home,TEAM_ID_away,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS


This implies that HOME/away TEAM ID and TEAM ID HOME/Away are the same

In [17]:
unique_games = games.groupby(['GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_games

Unnamed: 0,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,Count
0,10300001,1610612762,1610612742,2003,1
15687,21400797,1610612742,1610612762,2014,1
15685,21400795,1610612760,1610612763,2014,1
15684,21400794,1610612740,1610612754,2014,1
15683,21400793,1610612750,1610612744,2014,1
...,...,...,...,...,...
7838,20800087,1610612746,1610612742,2008,1
7837,20800086,1610612752,1610612762,2008,1
7836,20800085,1610612766,1610612761,2008,1
7835,20800084,1610612757,1610612750,2008,1


In [18]:
unique_games = games.groupby(['GAME_ID', 'HOME_TEAM_ID']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_games

Unnamed: 0,GAME_ID,HOME_TEAM_ID,Count
0,10300001,1610612762,1
15687,21400797,1610612742,1
15685,21400795,1610612760,1
15684,21400794,1610612740,1
15683,21400793,1610612750,1
...,...,...,...
7838,20800087,1610612746,1
7837,20800086,1610612752,1
7836,20800085,1610612766,1
7835,20800084,1610612757,1


In [19]:
unique_games = games.groupby(['GAME_ID', 'VISITOR_TEAM_ID']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_games

Unnamed: 0,GAME_ID,VISITOR_TEAM_ID,Count
0,10300001,1610612742,1
15687,21400797,1610612762,1
15685,21400795,1610612763,1
15684,21400794,1610612754,1
15683,21400793,1610612744,1
...,...,...,...
7838,20800087,1610612742,1
7837,20800086,1610612762,1
7836,20800085,1610612761,1
7835,20800084,1610612750,1


In [20]:
unique_games = games.groupby(['GAME_ID', 'SEASON']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_games

Unnamed: 0,GAME_ID,SEASON,Count
0,10300001,2003,1
15687,21400797,2014,1
15685,21400795,2014,1
15684,21400794,2014,1
15683,21400793,2014,1
...,...,...,...
7838,20800087,2008,1
7837,20800086,2008,1
7836,20800085,2008,1
7835,20800084,2008,1


This means that at these grouping there are no duplicates, which is the idea... meaning gameID and either home/Visitor id are unique and also across season

In [21]:
for _, row in games.drop_duplicates(subset=['GAME_ID', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON']).fillna(0).iterrows():
    cursor.execute("INSERT INTO games VALUES (%s, %s, %s, %s, %s)", 
                   (row['GAME_ID'], row['GAME_DATE_EST'], row['GAME_STATUS_TEXT'], row['SEASON'], row['HOME_TEAM_WINS']))

    # Home Team
    cursor.execute("INSERT INTO team_stats VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", 
                   (row['GAME_ID'], row['TEAM_ID_home'], 'HOME', row['PTS_home'], row['FG_PCT_home'], 
                    row['FT_PCT_home'], row['FG3_PCT_home'], row['AST_home'], row['REB_home']))

    # Away team
    cursor.execute("INSERT INTO team_stats VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", 
                   (row['GAME_ID'], row['TEAM_ID_away'], 'AWAY', row['PTS_away'], row['FG_PCT_away'], 
                    row['FT_PCT_away'], row['FG3_PCT_away'], row['AST_away'], row['REB_away']))
    connection.commit()



## GAME DETAILS (per player)

In [22]:
unique_gd = game_details.groupby(['GAME_ID', 'TEAM_ID', 'PLAYER_ID']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_gd

Unnamed: 0,GAME_ID,TEAM_ID,PLAYER_ID,Count
0,10300001,1610612742,696,1
390532,21400900,1610612761,101161,1
390526,21400900,1610612739,202681,1
390527,21400900,1610612739,202684,1
390528,21400900,1610612739,202697,1
...,...,...,...,...
195258,20800180,1610612760,101130,1
195257,20800180,1610612760,2863,1
195256,20800180,1610612760,2741,1
195255,20800180,1610612760,2555,1


In [25]:
# Let's check that all player IDs are in the players CSV
missing_ids = game_details[~game_details['PLAYER_ID'].isin(players['PLAYER_ID'])]['PLAYER_ID'].unique()
missing_rows = game_details[~game_details['PLAYER_ID'].isin(players['PLAYER_ID'])]
missing_rows

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
5,12000047,1610612766,CHA,Charlotte,1630163,LaMelo Ball,,,24:27,1.0,...,2.0,3.0,5.0,4.0,0.0,1.0,4.0,1.0,4.0,-1.0
11,12000047,1610612766,CHA,Charlotte,1630270,Xavier Sneed,,,4:34,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-1.0
12,12000047,1610612766,CHA,Charlotte,1630208,Nick Richards,,,3:53,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,-4.0
13,12000047,1610612766,CHA,Charlotte,1629602,Javin DeLaurier,,,3:18,1.0,...,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0
14,12000047,1610612766,CHA,Charlotte,1630232,Kahlil Whitney,,,1:46,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,-7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224080,10300001,1610612742,DAL,Dallas,2412,Jiri Welsch,,,5,2.0,...,0.0,4.0,4.0,2.0,0.0,0.0,3.0,3.0,8.0,
224082,10300001,1610612742,DAL,Dallas,1711,Raef LaFrentz,,,5,4.0,...,1.0,4.0,5.0,0.0,1.0,1.0,1.0,2.0,10.0,
224083,10300001,1610612742,DAL,Dallas,762,Shawn Bradley,,,5,3.0,...,2.0,2.0,4.0,1.0,1.0,1.0,0.0,1.0,7.0,
224085,10300001,1610612742,DAL,Dallas,1505,Tariq Abdul-Wahad,,,5,1.0,...,1.0,4.0,5.0,3.0,0.0,0.0,0.0,2.0,2.0,


As there are some missing players... let's add them to the players table and players junction table

In [26]:
# Let's get the players full info - in game details there's no season but we can query it
merged_data = pd.merge(missing_rows, games[['GAME_ID', 'SEASON']], on='GAME_ID', how='left')
players_missing = merged_data[['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'SEASON']].drop_duplicates()
players_missing

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,SEASON
0,1630163,LaMelo Ball,1610612766,2020
1,1630270,Xavier Sneed,1610612766,2020
2,1630208,Nick Richards,1610612766,2020
3,1629602,Javin DeLaurier,1610612766,2020
4,1630232,Kahlil Whitney,1610612766,2020
...,...,...,...,...
58945,2697,Antonio Meeking,1610612749,2003
58950,2144,Ademola Okulaja,1610612762,2003
58953,2650,Demetrius Alexander,1610612762,2003
58955,2651,Jermaine Boyette,1610612762,2003


In [27]:
# Inserting just players
for _, row in players_missing.drop_duplicates(subset=['PLAYER_ID', 'PLAYER_NAME'])[['PLAYER_ID', 'PLAYER_NAME']].fillna(0).iterrows():
    cursor.execute("INSERT INTO players VALUES (%s, %s)", 
                   (row['PLAYER_ID'], row['PLAYER_NAME']))
connection.commit()

In [28]:
# Inserting the relationship
for _, row in players_missing.drop_duplicates(subset=['PLAYER_ID', 'TEAM_ID', 'SEASON'])[['PLAYER_ID', 'TEAM_ID', 'SEASON']].fillna(0).iterrows():
    cursor.execute("INSERT INTO player_team_associations (PLAYER_ID, TEAM_ID, SEASON)  VALUES (%s, %s,  %s)", 
                   (row['PLAYER_ID'], row['TEAM_ID'], row['SEASON']))
connection.commit()


In [29]:
# Let's check that all team IDs are in the team CSV
missing_ids = game_details[~game_details['TEAM_ID'].isin(teams['TEAM_ID'])]['TEAM_ID'].unique()
missing_rows = game_details[~game_details['TEAM_ID'].isin(teams['TEAM_ID'])]
missing_rows


Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS


In [30]:
# Let's check that all game  IDs are in the game CSV
missing_ids = game_details[~game_details['GAME_ID'].isin(games['GAME_ID'])]['GAME_ID'].unique()
missing_rows = game_details[~game_details['GAME_ID'].isin(games['GAME_ID'])]
missing_rows

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS


In [31]:
# Check why there are some players not there
filtered_rows = players[players['PLAYER_ID'] == 'Shawn']
filtered_rows

Unnamed: 0,PLAYER_NAME,TEAM_ID,PLAYER_ID,SEASON


In [23]:
# Now let's check that each game and player ID just has 1 existence
unique_gd = game_details.groupby(['GAME_ID', 'PLAYER_ID']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_gd

Unnamed: 0,GAME_ID,PLAYER_ID,Count
0,10300001,696,1
390532,21400900,202335,1
390526,21400900,101236,1
390527,21400900,201567,1
390528,21400900,201880,1
...,...,...,...
195258,20800180,200756,1
195257,20800180,200754,1
195256,20800180,101130,1
195255,20800180,101108,1


Okay, so apparently this CSV doesn't have any duplicates...

In [33]:
for _, row in game_details.drop_duplicates(subset=['GAME_ID', 'TEAM_ID', 'PLAYER_ID']).fillna(0).iterrows():
    cursor.execute("INSERT INTO game_details VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", 
                   (row['GAME_ID'], row['PLAYER_ID'], row['START_POSITION'], row['COMMENT'], row['MIN'], 
                    row['FGM'], row['FGA'], row['FG_PCT'], row['FG3M'], row['FG3A'], row['FG3_PCT'], 
                    row['FTM'], row['FTA'], row['FT_PCT'], row['OREB'], row['DREB'], row['REB'], row['AST'], 
                    row['STL'], row['BLK'], row['TO'], row['PF'], row['PTS'], row['PLUS_MINUS']))

connection.commit()

## Ranking

In [39]:
unique_gd = ranking.groupby(['TEAM_ID', 'SEASON_ID', 'STANDINGSDATE']).size().reset_index(name='Count').sort_values(by='Count', ascending=False)
unique_gd

Unnamed: 0,TEAM_ID,SEASON_ID,STANDINGSDATE,Count
0,1610612737,12003,2003-10-05,1
125582,1610612756,22019,2020-03-20,1
125584,1610612756,22019,2020-03-22,1
125585,1610612756,22019,2020-03-23,1
125586,1610612756,22019,2020-03-24,1
...,...,...,...,...
62796,1610612746,22019,2020-08-09,1
62797,1610612746,22019,2020-08-10,1
62798,1610612746,22019,2020-08-11,1
62799,1610612746,22019,2020-08-12,1


So, this table shows the ranking apparently daily for each NBA team which would imply that a unique record is TEAM_ID, SEASON ID and the STANDINGDAte, and according to this there're no duplicates.

In [38]:
# Check why there are some players not there
filtered_rows = ranking[ranking['TEAM'] == 'L.A. Lakers']
filtered_rows

Unnamed: 0,TEAM_ID,LEAGUE_ID,SEASON_ID,STANDINGSDATE,CONFERENCE,TEAM,G,W,L,W_PCT,HOME_RECORD,ROAD_RECORD,RETURNTOPLAY
0,1610612747,0,12020,2020-12-21,West,L.A. Lakers,4,4,0,1.000,2-0,2-0,
15,1610612747,0,12020,2020-12-20,West,L.A. Lakers,4,4,0,1.000,2-0,2-0,
30,1610612747,0,12020,2020-12-19,West,L.A. Lakers,4,4,0,1.000,2-0,2-0,
45,1610612747,0,12020,2020-12-18,West,L.A. Lakers,4,4,0,1.000,2-0,2-0,
60,1610612747,0,12020,2020-12-17,West,L.A. Lakers,3,3,0,1.000,2-0,1-0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99716,1610612747,0,22013,2014-09-05,West,L.A. Lakers,82,27,55,0.329,14-27,13-28,
99731,1610612747,0,22013,2014-09-04,West,L.A. Lakers,82,27,55,0.329,14-27,13-28,
99746,1610612747,0,22013,2014-09-03,West,L.A. Lakers,82,27,55,0.329,14-27,13-28,
99761,1610612747,0,22013,2014-09-02,West,L.A. Lakers,82,27,55,0.329,14-27,13-28,


In [41]:
for _, row in ranking.drop_duplicates(subset=['TEAM_ID', 'SEASON_ID', 'STANDINGSDATE']).fillna(0).iterrows():
    cursor.execute("INSERT INTO ranking VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", 
                   (row['TEAM_ID'], row['LEAGUE_ID'], row['SEASON_ID'], row['STANDINGSDATE'], 
                    row['CONFERENCE'], row['TEAM'], row['G'], row['W'], row['L'], row['W_PCT'], 
                    row['HOME_RECORD'], row['ROAD_RECORD'], row['RETURNTOPLAY']))
connection.commit()    

In [43]:
cursor.close()
connection.close()