In [1]:
import pandas as pd
wnba = pd.read_csv('wnba.csv')
wnba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 143 entries, 0 to 142
Data columns (total 32 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Name          143 non-null    object 
 1   Team          143 non-null    object 
 2   Pos           143 non-null    object 
 3   Height        143 non-null    int64  
 4   Weight        142 non-null    float64
 5   BMI           142 non-null    float64
 6   Birth_Place   143 non-null    object 
 7   Birthdate     143 non-null    object 
 8   Age           143 non-null    int64  
 9   College       143 non-null    object 
 10  Experience    143 non-null    object 
 11  Games Played  143 non-null    int64  
 12  MIN           143 non-null    int64  
 13  FGM           143 non-null    int64  
 14  FGA           143 non-null    int64  
 15  FG%           143 non-null    float64
 16  15:00         143 non-null    int64  
 17  3PA           143 non-null    int64  
 18  3P%           143 non-null    

In [5]:
pd.set_option('display.max_columns',None)
wnba.head()

Unnamed: 0,Name,Team,Pos,Height,Weight,BMI,Birth_Place,Birthdate,Age,College,Experience,Games Played,MIN,FGM,FGA,FG%,15:00,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TO,PTS,DD2,TD3
0,Aerial Powers,DAL,F,183,71.0,21.200991,US,"January 17, 1994",23,Michigan State,2,8,173,30,85,35.3,12,32,37.5,21,26,80.8,6,22,28,12,3,6,12,93,0,0
1,Alana Beard,LA,G/F,185,73.0,21.329438,US,"May 14, 1982",35,Duke,12,30,947,90,177,50.8,5,18,27.8,32,41,78.0,19,82,101,72,63,13,40,217,0,0
2,Alex Bentley,CON,G,170,69.0,23.875433,US,"October 27, 1990",26,Penn State,4,26,617,82,218,37.6,19,64,29.7,35,42,83.3,4,36,40,78,22,3,24,218,0,0
3,Alex Montgomery,SAN,G/F,185,84.0,24.543462,US,"December 11, 1988",28,Georgia Tech,6,31,721,75,195,38.5,21,68,30.9,17,21,81.0,35,134,169,65,20,10,38,188,2,0
4,Alexis Jones,MIN,G,175,78.0,25.469388,US,"August 5, 1994",23,Baylor,R,24,137,16,50,32.0,7,20,35.0,11,12,91.7,3,9,12,12,7,0,14,50,0,0


In [6]:
freq_distro_pos = wnba['Pos'].value_counts()
freq_distro_height = wnba['Height'].value_counts()

In [7]:
freq_distro_pos

G      60
F      33
C      25
G/F    13
F/C    12
Name: Pos, dtype: int64

In [8]:
freq_distro_height 

188    20
193    18
175    16
185    15
191    11
183    11
173    11
196     9
178     8
180     7
170     6
198     5
201     2
168     2
206     1
165     1
Name: Height, dtype: int64

In [9]:
age_ascending = wnba['Age'].value_counts().sort_index(ascending=True)
age_ascending

21     2
22    10
23    15
24    16
25    15
26    12
27    13
28    14
29     8
30     9
31     8
32     8
33     3
34     5
35     4
36     1
Name: Age, dtype: int64

In [10]:
age_descending= wnba['Age'].value_counts().sort_index(ascending=False)
age_descending

36     1
35     4
34     5
33     3
32     8
31     8
30     9
29     8
28    14
27    13
26    12
25    15
24    16
23    15
22    10
21     2
Name: Age, dtype: int64

In [11]:
def make_pts_ordinal(row):
    if row['PTS'] <= 20:
        return 'very few points'
    if (20 < row['PTS'] <=  80):
        return 'few points'
    if (80 < row['PTS'] <=  150):
        return 'many, but below average'
    if (150 < row['PTS'] <= 300):
        return 'average number of points'
    if (300 < row['PTS'] <=  450):
        return 'more than average'
    else:
        return 'much more than average'

In [14]:
wnba['PTS_ordinal_scale'] = wnba.apply(make_pts_ordinal,axis=1)
wnba['PTS_ordinal_scale'].value_counts()

average number of points    45
few points                  27
many, but below average     25
more than average           21
much more than average      13
very few points             12
Name: PTS_ordinal_scale, dtype: int64

In [16]:
wnba['PTS_ordinal_scale'].value_counts().iloc[[4,0,2,1,5]] 

much more than average      13
average number of points    45
many, but below average     25
few points                  27
very few points             12
Name: PTS_ordinal_scale, dtype: int64

In [20]:
percentages = wnba['Age'].value_counts(normalize=True).sort_index()*100
percentages

21     1.398601
22     6.993007
23    10.489510
24    11.188811
25    10.489510
26     8.391608
27     9.090909
28     9.790210
29     5.594406
30     6.293706
31     5.594406
32     5.594406
33     2.097902
34     3.496503
35     2.797203
36     0.699301
Name: Age, dtype: float64

In [21]:
#25yrs old proportion
proportion_25 = percentages[25]/100
proportion_25 

0.1048951048951049

In [23]:
percentage_30 = percentages[30]
percentage_30

6.293706293706294

In [26]:
#% over 30
percentage_over_30 = percentages.loc[30:].sum()
percentage_over_30

26.573426573426573

In [27]:
#% below 23
percentage_below_23 = percentages.loc[:23].sum()
percentage_below_23

18.88111888111888

In [29]:
wnba['Age'].describe()

count    143.000000
mean      27.076923
std        3.679170
min       21.000000
25%       24.000000
50%       27.000000
75%       30.000000
max       36.000000
Name: Age, dtype: float64

In [28]:
percentiles = wnba['Age'].describe(percentiles=[.5,.75,.95])
percentiles

count    143.000000
mean      27.076923
std        3.679170
min       21.000000
50%       27.000000
75%       30.000000
95%       34.000000
max       36.000000
Name: Age, dtype: float64

In [32]:
age_upper_quartile = percentiles['75%']
age_upper_quartile

30.0

In [33]:
age_middle_quartile = percentiles['50%']
age_middle_quartile

27.0

In [35]:
age_95th_quartile = percentiles['95%']
age_95th_quartile

34.0

In [36]:
grouped_freq_table = wnba['PTS'].value_counts(bins=10, normalize=True).sort_index(ascending=False)*100
grouped_freq_table 

(525.8, 584.0]     3.496503
(467.6, 525.8]     2.797203
(409.4, 467.6]     5.594406
(351.2, 409.4]     6.993007
(293.0, 351.2]     5.594406
(234.8, 293.0]    11.888112
(176.6, 234.8]    13.986014
(118.4, 176.6]    11.888112
(60.2, 118.4]     16.783217
(1.417, 60.2]     20.979021
Name: PTS, dtype: float64

In [37]:
interval1 = wnba['MIN'].value_counts(bins=1).sort_index()
interval2 = wnba['MIN'].value_counts(bins=2).sort_index()
interval3 = wnba['MIN'].value_counts(bins=3).sort_index()
interval10 = wnba['MIN'].value_counts(bins=10).sort_index()
interval40 = wnba['MIN'].value_counts(bins=40).sort_index()

In [38]:
interval1


(10.993, 1018.0]    143
Name: MIN, dtype: int64

In [39]:
interval2

(10.993, 515.0]    74
(515.0, 1018.0]    69
Name: MIN, dtype: int64

In [40]:
interval10

(10.993, 112.6]    19
(112.6, 213.2]     10
(213.2, 313.8]     17
(313.8, 414.4]     15
(414.4, 515.0]     13
(515.0, 615.6]     12
(615.6, 716.2]     15
(716.2, 816.8]     14
(816.8, 917.4]     19
(917.4, 1018.0]     9
Name: MIN, dtype: int64

In [42]:
interval40

(10.993, 37.15]     5
(37.15, 62.3]       7
(62.3, 87.45]       3
(87.45, 112.6]      4
(112.6, 137.75]     4
(137.75, 162.9]     2
(162.9, 188.05]     1
(188.05, 213.2]     3
(213.2, 238.35]     7
(238.35, 263.5]     4
(263.5, 288.65]     4
(288.65, 313.8]     2
(313.8, 338.95]     2
(338.95, 364.1]     4
(364.1, 389.25]     6
(389.25, 414.4]     3
(414.4, 439.55]     1
(439.55, 464.7]     5
(464.7, 489.85]     3
(489.85, 515.0]     4
(515.0, 540.15]     3
(540.15, 565.3]     1
(565.3, 590.45]     3
(590.45, 615.6]     5
(615.6, 640.75]     4
(640.75, 665.9]     3
(665.9, 691.05]     5
(691.05, 716.2]     3
(716.2, 741.35]     5
(741.35, 766.5]     5
(766.5, 791.65]     1
(791.65, 816.8]     3
(816.8, 841.95]     6
(841.95, 867.1]     6
(867.1, 892.25]     3
(892.25, 917.4]     4
(917.4, 942.55]     3
(942.55, 967.7]     4
(967.7, 992.85]     0
(992.85, 1018.0]    2
Name: MIN, dtype: int64

In [44]:
intervals = pd.interval_range(start=0, end=600, freq=60)
gr_freq_table = pd.Series([0,0,0,0,0,0,0,0,0,0], index=intervals)
gr_freq_table

(0, 60]       0
(60, 120]     0
(120, 180]    0
(180, 240]    0
(240, 300]    0
(300, 360]    0
(360, 420]    0
(420, 480]    0
(480, 540]    0
(540, 600]    0
dtype: int64

In [46]:
for value in wnba['PTS']:
    for interval in intervals:
        if value in interval:
            gr_freq_table.loc[interval] +=1
gr_freq_table           

(0, 60]       30
(60, 120]     25
(120, 180]    17
(180, 240]    22
(240, 300]    15
(300, 360]     7
(360, 420]    11
(420, 480]     7
(480, 540]     4
(540, 600]     5
dtype: int64