In [1]:
# Dependencies
import pandas as pd

In [2]:
# Resource Files
data1819_path = "Resources/2018-2019 NBA Player Stats.xlsx"
data1920_path = "Resources/2019-2020 NBA Player Stats.xlsx"
data2021_path = "Resources/2020-2021 NBA Player Stats.xlsx"
data2122_path = "Resources/2021-2022 NBA Player Stats.xlsx"
data2223_path = "Resources/2022-2023 NBA Player Stats.csv"
abbrv_path = "Resources/NBA Team Abbreviations.xlsx "

# Read data into data frames
data1819 = pd.read_excel(data1819_path, header = 1)
data1920 = pd.read_excel(data1920_path, header = 1)
data2021 = pd.read_excel(data2021_path, header = 1)
data2122 = pd.read_excel(data2122_path, header = 1)
data2223 = pd.read_csv(data2223_path)
abbrv = pd.read_excel(abbrv_path)

In [3]:
# CLEANING DATA

# Changing abbrevations to title case
abbrv["Abbreviation/acronym"] = abbrv["Abbreviation/acronym"].str.capitalize()

# Adding Years to Dataframes
data1819["Year"] = "2018-2019"
data1920["Year"] = "2019-2020"
data2021["Year"] = "2020-2021"
data2122["Year"] = "2021-2022"
data2223["Year"] = "2022-2023"

# Changing Column Names
columns = ["RANK", "FULL NAME",	"TEAM",	"POS", "AGE", "GP",	"MPG", "MIN%", "USG%", "Tor%", "FTA", "FT%", "2PA", "2P%", "3PA", 
    "3P%", "eFG%", "TS%", "PPG", "RPG", "TRB%", "APG", "AST%", "SPG", "BPG", "TOPG", "VI", "ORTG", "DRTG", "YEAR"]

data1819 = data1819.set_axis(columns, axis = 1)
data1920 = data1920.set_axis(columns, axis = 1)
data2021 = data2021.set_axis(columns, axis = 1)
data2122 = data2122.set_axis(columns, axis = 1)
data2223 = data2223.set_axis(columns, axis = 1)

In [4]:
# Combining datasets
merged_df = pd.concat([data1819, data1920, data2021, data2122, data2223])

# Adding Full team name
merged_df = pd.merge(merged_df, abbrv, left_on = "TEAM", right_on = "Abbreviation/acronym")

# Deleting Unused Columns and Reordering
merged_df.pop("TEAM")
merged_df.pop("Abbreviation/acronym")

merged_df

Unnamed: 0,RANK,FULL NAME,POS,AGE,GP,MPG,MIN%,USG%,Tor%,FTA,...,APG,AST%,SPG,BPG,TOPG,VI,ORTG,DRTG,YEAR,Franchise
0,1,Marc Gasol,C,34.37,24,30.6,63.8,13.6,9.5,46.0,...,3.0,14.7,0.88,1.08,0.88,7.4,118.4,97.2,2018-2019,Toronto Raptors
1,2,Danny Green,G-F,31.98,24,28.5,59.4,12.6,13.6,23.0,...,1.1,5.4,1.29,0.46,1.08,4.2,97.4,101.6,2018-2019,Toronto Raptors
2,3,Serge Ibaka,F-C,29.74,24,20.8,43.4,21.4,10.6,42.0,...,0.9,7.4,0.46,0.96,1.04,7.1,105.5,91.9,2018-2019,Toronto Raptors
3,4,Kawhi Leonard,F,27.96,24,39.1,81.6,32.0,11.1,216.0,...,3.9,19.3,1.67,0.71,3.08,10.5,118.9,102.7,2018-2019,Toronto Raptors
4,5,Kyle Lowry,G,33.22,24,37.5,78.2,18.4,14.5,81.0,...,6.6,27.5,1.29,0.29,2.21,8.4,116.8,100.1,2018-2019,Toronto Raptors
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358,154,Austin Rivers,G,30.90,4,11.6,9.0,11.1,0.0,0.0,...,0.0,0.3,3.80,2.80,4.00,3.4,115.4,118.1,2022-2023,Minnesota Timberwolves
1359,177,Nathan Knight,F-C,25.70,2,2.4,49.2,20.0,0.0,0.0,...,0.0,0.5,1.50,1.00,1.50,0.0,,,2022-2023,Minnesota Timberwolves
1360,208,Jordan McLaughlin,G,27.20,2,7.1,13.1,0.0,0.0,0.0,...,0.0,0.0,1.00,1.00,2.00,0.0,33.0,104.4,2022-2023,Minnesota Timberwolves
1361,210,Josh Minott,F,20.50,1,6.3,14.7,50.0,0.0,0.0,...,1.0,1.0,0.00,0.00,0.00,0.0,0.0,88.3,2022-2023,Minnesota Timberwolves


In [9]:
# Drop any rows with NA values
merged_df = merged_df.dropna(axis = 0)
merged_df

Unnamed: 0,RANK,FULL NAME,POS,AGE,GP,MPG,MIN%,USG%,Tor%,FTA,...,APG,AST%,SPG,BPG,TOPG,VI,ORTG,DRTG,YEAR,Franchise
0,1,Marc Gasol,C,34.37,24,30.6,63.8,13.6,9.5,46.000,...,3.0,14.7,0.88,1.08,0.88,7.4,118.4,97.2,2018-2019,Toronto Raptors
1,2,Danny Green,G-F,31.98,24,28.5,59.4,12.6,13.6,23.000,...,1.1,5.4,1.29,0.46,1.08,4.2,97.4,101.6,2018-2019,Toronto Raptors
2,3,Serge Ibaka,F-C,29.74,24,20.8,43.4,21.4,10.6,42.000,...,0.9,7.4,0.46,0.96,1.04,7.1,105.5,91.9,2018-2019,Toronto Raptors
3,4,Kawhi Leonard,F,27.96,24,39.1,81.6,32.0,11.1,216.000,...,3.9,19.3,1.67,0.71,3.08,10.5,118.9,102.7,2018-2019,Toronto Raptors
4,5,Kyle Lowry,G,33.22,24,37.5,78.2,18.4,14.5,81.000,...,6.6,27.5,1.29,0.29,2.21,8.4,116.8,100.1,2018-2019,Toronto Raptors
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1356,95,Taurean Prince,F,29.20,5,20.0,19.4,13.0,9.0,0.778,...,0.2,1.0,9.20,8.60,10.00,4.1,101.1,111.5,2022-2023,Minnesota Timberwolves
1357,142,Jaylen Nowell,G,23.90,5,12.4,21.5,7.1,2.0,0.500,...,0.0,0.4,4.20,3.80,4.80,4.0,73.4,116.8,2022-2023,Minnesota Timberwolves
1358,154,Austin Rivers,G,30.90,4,11.6,9.0,11.1,0.0,0.000,...,0.0,0.3,3.80,2.80,4.00,3.4,115.4,118.1,2022-2023,Minnesota Timberwolves
1360,208,Jordan McLaughlin,G,27.20,2,7.1,13.1,0.0,0.0,0.000,...,0.0,0.0,1.00,1.00,2.00,0.0,33.0,104.4,2022-2023,Minnesota Timberwolves


In [11]:
merged_df.to_csv("Resources/Combined NBA Player Stats.csv", index = False)