# Basketball Playoffs Qualification

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

awards_players = pd.read_csv('../data/basketballPlayoffs/awards_players.csv')
coaches = pd.read_csv('../data/basketballPlayoffs/coaches.csv')
players = pd.read_csv('../data/basketballPlayoffs/players.csv')
players_teams = pd.read_csv('../data/basketballPlayoffs/players_teams.csv')
series_post = pd.read_csv('../data/basketballPlayoffs/series_post.csv')
teams = pd.read_csv('../data/basketballPlayoffs/teams.csv')
teams_post = pd.read_csv('../data/basketballPlayoffs/teams_post.csv')

# Set display options to show all rows and adjust width

In [28]:
pd.set_option('display.max_rows', None)  # Show all rows in output
pd.set_option('display.width', 1000)  # Set display width

# Display the first few rows of each DataFrame to understand the data structure

In [29]:
print("Awards Players Data:")
print(awards_players)
print("\nCoaches Data:")
print(coaches)
print("\nPlayers Data:")
print(players)
print("\nPlayers Teams Data:")
print(players_teams)
print("\nSeries Post Data:")
print(series_post)
print("\nTeams Data:")
print(teams)
print("\nTeams Post Data:")
print(teams_post)

Awards Players Data:
      playerID                                   award  year  lgID
0   thompti01w      All-Star Game Most Valuable Player     1  WNBA
1   leslili01w      All-Star Game Most Valuable Player     2  WNBA
2   leslili01w      All-Star Game Most Valuable Player     3  WNBA
3   teaslni01w      All-Star Game Most Valuable Player     4  WNBA
4   swoopsh01w      All-Star Game Most Valuable Player     6  WNBA
5   douglka01w      All-Star Game Most Valuable Player     7  WNBA
6    fordch01w      All-Star Game Most Valuable Player     8  WNBA
7    cashsw01w      All-Star Game Most Valuable Player    10  WNBA
8   coopemi01w                       Coach of the Year     1  WNBA
9   hugheda99w                       Coach of the Year     2  WNBA
10  stanlma99w                       Coach of the Year     3  WNBA
11  laimbbi01w                       Coach of the Year     4  WNBA
12  mcconsu01w                       Coach of the Year     5  WNBA
13  whisejo99w                       Coac

Check for Missing Data

In [30]:
print(awards_players.isnull().sum())
print(coaches.isnull().sum())
print(players.isnull().sum())
print(players_teams.isnull().sum())
print(series_post.isnull().sum())
print(teams.isnull().sum())
print(teams_post.isnull().sum())

playerID    0
award       0
year        0
lgID        0
dtype: int64
coachID        0
year           0
tmID           0
lgID           0
stint          0
won            0
lost           0
post_wins      0
post_losses    0
dtype: int64
bioID             0
pos              78
firstseason       0
lastseason        0
height            0
weight            0
college         167
collegeOther    882
birthDate         0
deathDate         0
dtype: int64
playerID              0
year                  0
stint                 0
tmID                  0
lgID                  0
GP                    0
GS                    0
minutes               0
points                0
oRebounds             0
dRebounds             0
rebounds              0
assists               0
steals                0
blocks                0
turnovers             0
PF                    0
fgAttempted           0
fgMade                0
ftAttempted           0
ftMade                0
threeAttempted        0
threeMade             0


Summary Statistics

In [31]:
print(players_teams.describe())
print(teams.describe())


              year        stint           GP           GS      minutes       points    oRebounds    dRebounds     rebounds      assists  ...   PostBlocks  PostTurnovers       PostPF  PostfgAttempted   PostfgMade  PostftAttempted   PostftMade  PostthreeAttempted  PostthreeMade       PostDQ
count  1876.000000  1876.000000  1876.000000  1876.000000  1876.000000  1876.000000  1876.000000  1876.000000  1876.000000  1876.000000  ...  1876.000000    1876.000000  1876.000000      1876.000000  1876.000000      1876.000000  1876.000000         1876.000000    1876.000000  1876.000000
mean      5.326226     0.113539    24.320896    12.438166   501.269190   176.261727    24.388060    54.334755    78.722814    39.031983  ...     0.759062       2.623134     3.735075        12.282516     5.149254         3.672708     2.822495            2.924307       1.019723     0.026652
std       2.905475     0.422574    10.460614    13.641697   359.566117   161.983839    23.325974    48.347088    69.210226    40.1

Check Data Types

In [32]:
print(teams.dtypes)


year            int64
lgID           object
tmID           object
franchID       object
confID         object
divID         float64
rank            int64
playoff        object
seeded          int64
firstRound     object
semis          object
finals         object
name           object
o_fgm           int64
o_fga           int64
o_ftm           int64
o_fta           int64
o_3pm           int64
o_3pa           int64
o_oreb          int64
o_dreb          int64
o_reb           int64
o_asts          int64
o_pf            int64
o_stl           int64
o_to            int64
o_blk           int64
o_pts           int64
d_fgm           int64
d_fga           int64
d_ftm           int64
d_fta           int64
d_3pm           int64
d_3pa           int64
d_oreb          int64
d_dreb          int64
d_reb           int64
d_asts          int64
d_pf            int64
d_stl           int64
d_to            int64
d_blk           int64
d_pts           int64
tmORB           int64
tmDRB           int64
tmTRB     

# Remove unnecessary columns from the awards_players DataFrame (example)

In [33]:
# awards_players = awards_players.drop(columns=['lgID'])

# Save the cleaned DataFrame back to the same CSV file

In [34]:
# awards_players.to_csv('../data/basketballPlayoffs/awards_players.csv', index=False)