In [19]:
# H0 : The average intrinsic ability of the champion team's players in the last two seasons is the same as in the previous two seasons.
# H1 : The average intrinsic ability of the champion team's players in the last two seasons is greater than in the previous two seasons.

In [20]:
import pandas as pd
from scipy.stats import shapiro, mannwhitneyu

In [21]:
# Load the dataset
data = pd.read_csv("D:/Bootcamp/mini project/top_team_players_data.csv")
data.head()

Unnamed: 0,player_name,player_id,team_id,year,retired,experience,birthdate,Age,intrinsic_ability
0,Giannis Antetokounmpo,a/antetgi01,MIL,2020,False,6,12/6/1994,30,0.2
1,Thanasis Antetokounmpo,a/antetth01,MIL,2020,False,1,7/18/1992,32,0.03125
2,Dragan Bender,b/bendedr01,MIL,2020,True,3,11/17/1997,27,0.111111
3,Eric Bledsoe,b/bledser01,MIL,2020,True,9,12/9/1989,35,0.257143
4,Sterling Brown,b/brownst02,MIL,2020,False,2,2/10/1995,29,0.068966


In [22]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   player_name        191 non-null    object 
 1   player_id          191 non-null    object 
 2   team_id            191 non-null    object 
 3   year               191 non-null    int64  
 4   retired            191 non-null    bool   
 5   experience         191 non-null    int64  
 6   birthdate          191 non-null    object 
 7   Age                191 non-null    int64  
 8   intrinsic_ability  191 non-null    float64
dtypes: bool(1), float64(1), int64(3), object(4)
memory usage: 12.3+ KB


In [23]:
# Create a copy of the dataset
data_copy = data.copy()

In [24]:
# Divide data into two periods
period_1 = data_copy[data_copy['year'].between(2021, 2022)]  # 2020-2021 to 2021-2022
period_1.head()

Unnamed: 0,player_name,player_id,team_id,year,retired,experience,birthdate,Age,intrinsic_ability
35,Udoka Azubuike,a/azubuud01,UTA,2021,False,0,9/17/1999,25,0.0
36,Bojan Bogdanović,b/bogdabo02,UTA,2021,False,6,4/18/1989,35,0.171429
37,Jarrell Brantley,b/brantja01,UTA,2021,False,1,6/7/1996,28,0.035714
38,Jordan Clarkson,c/clarkjo01,UTA,2021,False,6,6/7/1992,32,0.1875
39,Mike Conley,c/conlemi01,UTA,2021,False,13,10/11/1987,37,0.351351


In [25]:
period_2 = data_copy[data_copy['year'].between(2023, 2024)]  # 2022-2023 to 2023-2024
period_2.head()

Unnamed: 0,player_name,player_id,team_id,year,retired,experience,birthdate,Age,intrinsic_ability
116,Grayson Allen,a/allengr01,MIL,2023,False,4,10/8/1995,29,0.137931
117,Giannis Antetokounmpo,a/antetgi01,MIL,2023,False,9,12/6/1994,30,0.3
118,Thanasis Antetokounmpo,a/antetth01,MIL,2023,False,4,7/18/1992,32,0.125
119,MarJon Beauchamp,b/beaucma01,MIL,2023,False,0,10/12/2000,24,0.0
120,Jevon Carter,c/carteje01,MIL,2023,False,4,9/14/1995,29,0.137931


In [26]:
# Calculate mean intrinsic_ability for both periods
mean_intrinsic_ability_1 = period_1['intrinsic_ability'].mean()
print(f"mean for intrinsic_ability in Period1: {mean_intrinsic_ability_1}")
mean_intrinsic_ability_2 = period_2['intrinsic_ability'].mean()
print(f"mean for intrinsic_ability in Period2: {mean_intrinsic_ability_2}")

mean for intrinsic_ability in Period1: 0.11732189254320988
mean for intrinsic_ability in Period2: 0.17959903813333333


In [27]:
# Check if the intrinsic ability data is normally distributed for both periods
shapiro_1 = shapiro(period_1['intrinsic_ability'])
print(f"Shapiro-Wilk for Period 1:{shapiro_1}")
shapiro_2 = shapiro(period_2['intrinsic_ability'])
print(f"Shapiro-Wilk for Period 2:", shapiro_2)

Shapiro-Wilk for Period 1:ShapiroResult(statistic=np.float64(0.9084076142851828), pvalue=np.float64(2.52362453786993e-05))
Shapiro-Wilk for Period 2: ShapiroResult(statistic=np.float64(0.9463447845580649), pvalue=np.float64(0.003158702743104995))


In [28]:
# Display the Shapiro-Wilk test results
print(f"Shapiro-Wilk Test for Period 1:")

print(f"  Statistic: {shapiro_1.statistic:.4f}")

print(f"  P-value: {shapiro_1.pvalue:.4f}")

print(f"  Normality: {'Accepted' if shapiro_1.pvalue > 0.05 else 'Rejected'}")

print(f"\nShapiro-Wilk Test for Period 2:")

print(f"  Statistic: {shapiro_2.statistic:.4f}")

print(f"  P-value: {shapiro_2.pvalue:.4f}")

print(f"  Normality: {'Accepted' if shapiro_2.pvalue > 0.05 else 'Rejected'}")

Shapiro-Wilk Test for Period 1:
  Statistic: 0.9084
  P-value: 0.0000
  Normality: Rejected

Shapiro-Wilk Test for Period 2:
  Statistic: 0.9463
  P-value: 0.0032
  Normality: Rejected


In [29]:
# Use Mann-Whitney U test to compare intrinsic ability between the two periods
stat, p_value = mannwhitneyu(period_1['intrinsic_ability'], period_2['intrinsic_ability'])

In [30]:
# Display the Mann-Whitney U test results
print(f"\nMann-Whitney U Test:")
print(f"  Statistic: {stat}")
print(f"  P-value: {p_value}")


Mann-Whitney U Test:
  Statistic: 2133.0
  P-value: 0.001314756512552457


In [31]:
# Evaluate the null hypothesis
if p_value < 0.05:
    print("Null hypothesis rejected: The average intrinsic ability is significantly different.")
else:
    print("Null hypothesis not rejected: No significant difference in average intrinsic ability.")

Null hypothesis rejected: The average intrinsic ability is significantly different.
