In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st

In [2]:
# Study data files
nba_salaries_path = "NBA_Salaries.csv"
wnba_salaries_path = "WNBA_salaries.csv"

In [3]:
# Read the mouse data and the study results
nba_salaries = pd.read_csv(nba_salaries_path)
wnba_salaries = pd.read_csv(wnba_salaries_path)

In [4]:
nba_salaries.columns

In [5]:
nba_salaries["Salary"] = nba_salaries["Salary"].str.replace("$","")

In [6]:
nba_salaries.dtypes

In [7]:
nba_salaries["Salary"] = nba_salaries["Salary"].str.replace(",","")

In [8]:
nba_salaries["Salary"] = nba_salaries["Salary"].astype(float)

In [9]:
nba_salaries.dtypes

In [10]:
nba_complete = nba_salaries.copy(deep=True)

In [11]:
nba_complete.head()

In [12]:
top_eighteen_best_paid_nba = nba_salaries.sort_values(by = "Salary", ascending = False)[:18]
#top_eighteen_best_paid_nba["Salary"] = top_eighteen_best_paid_nba["Salary"]/1000000
top_eighteen_best_paid_nba

In [13]:
top_ten_best_paid_nba = top_eighteen_best_paid_nba.drop_duplicates(["Player"])

In [14]:
x_axis = np.arange(0, len(top_ten_best_paid_nba["Player"]))

In [15]:
x_axis

In [16]:
Player = top_ten_best_paid_nba['Player']
Player

In [17]:
Salary = top_ten_best_paid_nba['Salary']

## NBA Summary Stats

In [18]:
mean = nba_salaries.groupby('Year')['Salary'].mean()
mean

In [19]:
median = nba_salaries.groupby('Year')['Salary'].median()
median

In [20]:
variance = nba_salaries.groupby('Year')['Salary'].var()
pd.options.display.float_format = '{:.0f}'.format
variance

In [21]:
std = nba_salaries.groupby('Year')['Salary'].std()
pd.options.display.float_format = '{:.0f}'.format
std

In [22]:
sem = nba_salaries.groupby('Year')['Salary'].sem()
sem

In [23]:
summary_stats = nba_salaries.groupby('Year')['Salary'].agg(['mean', 'median', 'var', 'std','sem'])
display(summary_stats)

## NBA Graphs

In [24]:
df = nba_salaries.describe()

In [25]:
df

In [26]:
# df['Salary'].plot(kind = "bar", color = "blue")

# plt.title("NBA Summary")
# plt.xlabel("Summary Stats")
# #plt.xticks(wnba_x_axis,wnba_player,rotation=45)
# #plt.ylim(210000,236000)
# plt.ylabel("Salary ($)")
# plt.tight_layout()
# plt.ticklabel_format(style='plain', useOffset=False, axis='y')
# plt.savefig("graph_outputs/NBA_summary.png")
# plt.show()

In [27]:
nba_salaries['Salary']=nba_salaries['Salary']/1000000
nba_salaries

In [28]:

plt.figure(figsize=(12,6))
nba_salaries.boxplot(by ='Year', column=['Salary'], grid = False)
plt.ylabel("Salary (Million $)")

plt.ticklabel_format(style='plain', useOffset=False, axis='y')

plt.savefig("graph_outputs/NBA_Salary_2019_2023.png")
plt.show()

In [29]:
plt.figure(figsize=(12,6))
nba_salaries.boxplot(by ='Team', column=['Salary'], grid = False)
plt.xlabel("Team")
plt.ylabel("Salary (Million $)")
plt.xticks(rotation='vertical')
plt.ticklabel_format(style='plain', useOffset=False, axis='y')

#plt.tight_layout()
plt.show()


In [30]:
nba_2023 = nba_salaries[nba_salaries['Year']== 2023]
nba_2023.head()

In [31]:
#nba_salaries['Salary'] = nba_2023['Salary']/1000000

In [32]:


nba_2023.boxplot(by ='Team', column=['Salary'], grid = False,
    flierprops = dict(marker='o', markerfacecolor='blue', markersize=7,
                  linestyle='none', markeredgecolor='black'))
plt.title('2023 NBA Top 5 Team Salary')
plt.xlabel('Teams')
plt.xticks(rotation='vertical')
plt.ylabel('Salary ($ Million)')
plt.ticklabel_format(style='plain', useOffset=False, axis='y')
plt.tight_layout()

plt.savefig("graph_outputs/NBA_Top5_Teams_Salary_2023.png")
plt.show()

In [33]:
nba_salaries

In [34]:
nba_2019 = nba_complete[nba_salaries['Year']== 2019]
nba_2019.head()

In [35]:
nba_2019['Salary']= nba_2019['Salary']/1000000
nba_2019.head()

In [36]:
nba_2019.boxplot(by ='Team', column=['Salary'], grid = False)
    #flierprops = dict(marker='o', markerfacecolor='orange', markersize=7,
                  #linestyle='none', markeredgecolor='black'))
plt.title('2019 NBA Top 5 Team Salary')
plt.xlabel('Teams')
plt.xticks(rotation='vertical')
plt.ylabel('Salary (Million $)')
plt.ticklabel_format(style='plain', useOffset=False, axis='y')
plt.tight_layout()

plt.savefig("graph_outputs/NBA_Top5_Teams_Salary_2019.png")
plt.show()

In [37]:
list(Salary)

In [38]:
#top_ten_best_paid_nba = nba_salaries.sort_values(by = "Salary", ascending = False)[:10]

plt.figure(figsize=(10,6))
bars = plt.bar(x_axis, Salary /1e6, facecolor = "blue")
plt.title("Top Ten Best Paid NBA Players")
plt.xlabel("Player")
plt.xticks(x_axis,Player,rotation=45)
plt.ylim(35,53)
plt.ylabel("Salary (Million $)")

# print(bars)
for b in bars:
#     print(b.get_height())
    #plt.text(x = 0.1 + b.get_x(), y = 100000 + b.get_height()/1000000, s = str(round(b.get_height(),2)))
    plt.text(x = b.get_x(),y=0.3 + b.get_height(), s = str(round(b.get_height(),2)))

plt.tight_layout()
plt.ticklabel_format(style='plain', useOffset=False, axis='y')

plt.savefig("graph_outputs/TopTen_NBA.png")
plt.show()

In [39]:
nba_salaries.columns

## WNBA Summary Stats

In [40]:
wnba_salaries.dtypes

In [41]:
wnba_salaries.columns

In [42]:
wnba_salaries = wnba_salaries.rename(columns=str.strip)

In [43]:
wnba_salaries

In [44]:
wnba_salaries.dtypes

In [45]:
#wnba_salaries["Salary"] = wnba_salaries[" Salary "].str.replace(",","")

In [46]:
wnba_salaries["Salary"] = wnba_salaries["Salary"].str.replace("$","")

In [47]:
wnba_salaries["Salary"] = wnba_salaries["Salary"].str.replace(",","")

In [48]:
#wnba_salaries.dtypes

In [49]:
wnba_salaries["Salary"] = wnba_salaries["Salary"].astype(float)

In [50]:
wnba_salaries.dtypes

In [51]:
wnba_salaries.sample(10)

In [52]:
mean = wnba_salaries.groupby('Year')['Salary'].mean()
mean

In [53]:
median = wnba_salaries.groupby('Year')['Salary'].median()
median

In [54]:
variance = wnba_salaries.groupby('Year')['Salary'].var()
variance

In [55]:
std = wnba_salaries.groupby('Year')['Salary'].std()
std

In [56]:
sem = wnba_salaries.groupby('Year')['Salary'].sem()
sem

In [57]:
wnba_summary_stats = wnba_salaries.groupby('Year')['Salary'].agg(['mean', 'median', 'var', 'std','sem'])
wnba_summary_stats

## WNBA Graphs

In [58]:
wnba_salaries.boxplot(by ='Year', column=['Salary'], grid = False)
plt.ylabel('Salary ($)')

plt.savefig("graph_outputs/WNBA_Salary_2019_2013.png")
plt.show()

In [59]:
wnba_salaries.boxplot(by ='Team', column=['Salary'], grid = False)
plt.xticks(rotation='vertical')
plt.show()

In [60]:
wnba_2023 = wnba_salaries[wnba_salaries['Year']== 2023]
wnba_2023.head()

In [61]:
wnba_2023.boxplot(by ='Team', column=['Salary'], grid = False,
    flierprops = dict(marker='D', markerfacecolor='orange', markersize=7,
                  linestyle='none', markeredgecolor='black'))
plt.title('2023 WNBA Top 5 Team Salary')
plt.xlabel('Teams')
plt.xticks(rotation=60)
plt.ylabel('Salary ($)')
plt.ticklabel_format(style='plain', useOffset=False, axis='y')
plt.tight_layout()

plt.savefig("graph_outputs/WNBA_Top5_Teams_Salary_2023.png")
plt.show()

In [62]:
wnba_2019 = wnba_salaries[wnba_salaries['Year']== 2019]
wnba_2019.head()

In [63]:
wnba_2019.boxplot(by ='Team', column=['Salary'], grid = False,
    flierprops = dict(marker='D', markerfacecolor='pink', markersize=7,
                  linestyle='none', markeredgecolor='black'))
plt.title('2019 WNBA Top 5 Team Salary')
plt.xlabel('Teams')
plt.xticks(rotation=75)
plt.ylabel('Salary ($)')
plt.ticklabel_format(style='plain', useOffset=False, axis='y')
plt.tight_layout()

plt.savefig("graph_outputs/WNBA_Top5_Teams_Salary_2019.png")
plt.show()

In [64]:
best_paid_wnba = wnba_salaries.sort_values(by = "Salary", ascending = False)[:20]
best_paid_wnba

In [65]:
wnba_top_ten = best_paid_wnba.drop_duplicates(["Player"])
display(wnba_top_ten)

In [66]:
wnba_x_axis = np.arange(0, len(wnba_top_ten["Player"]))

In [67]:
wnba_player = wnba_top_ten["Player"]

In [68]:
wnba_salary = wnba_top_ten['Salary']

In [69]:
#plt.figure(figsize=(10,6))
bars = plt.bar(wnba_x_axis, wnba_salary, facecolor = "orange")
plt.title("Top Ten Best Paid WNBA Players")
plt.xlabel("Player")
plt.xticks(wnba_x_axis,wnba_player,rotation=45)
plt.ylim(210000,237000)
plt.ylabel("Salary ($)")
# print(bars)
for b in bars:
#     print(b.get_height())
    plt.text(x = 0.1 + b.get_x(), y = 100 + b.get_height(), s = str(round(b.get_height()/1000,2)),
            fontsize=8)

plt.tight_layout()

plt.savefig("graph_outputs/TopTen_WNBA.png")
plt.show()

In [70]:
df2 = wnba_salaries.describe()
df2

In [71]:
df3 = df2["Salary"]

In [72]:
# #df2['Salary'].plot(kind = "bar", color = "orange")
# #df3.plot()
# df3.plot(kind = "bar", color = "orange")
# plt.title("WNBA Summary")
# plt.xlabel("Summary Stats")
# #plt.xticks(wnba_x_axis,wnba_player,rotation=45)
# #plt.ylim(210000,236000)
# plt.ylabel("Salary ($)")
# plt.tight_layout()
# #fig, ax = plt.subplots()
# #bar_container = ax.par(wnba_salaries.describe(),df2["Salary"])
# #ax.bar_label(bar_container, fmt='{:, .0f}')
# #for i, v in enumerate(df2["Salary"]):
#     #ax.text(i,v + 10000, str(v), ha="center", va="bottom")

# plt.savefig("graph_outputs/WNBA_summary.png")
# plt.show()

## NBA and WNB Graphs

In [73]:
nba_vs_wnba = pd.DataFrame({"nba": df['Salary'],
                           "wnba": df2['Salary']
                        })
nba_vs_wnba

In [74]:
nba_vs_wnba.plot(kind = "bar")
plt.title("NBA vs WNBA Summary Salary Stats")
plt.xlabel("Summary Stats")
plt.ylabel("Salary ($)")
plt.ticklabel_format(style='plain', useOffset=False, axis='y')
plt.savefig("graph_outputs/NBAvsWNBA1.png")