In [11]:
import pandas as pd
from scipy.stats import chi2_contingency

In [19]:
match=pd.read_csv('../datasets/matches.csv')
deliveries=pd.read_csv('../datasets/deliveries.csv')

In [13]:
print(match.columns)

Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
       'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
       'method', 'umpire1', 'umpire2'],
      dtype='object')


In [17]:
match.head(2)

Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,2007/08,Bangalore,2008-04-18,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335983,2007/08,Chandigarh,2008-04-19,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Kings XI Punjab,Chennai Super Kings,Chennai Super Kings,bat,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri


In [29]:
df1=match[['id', 'player_of_match', 'winner']]
df2=deliveries[['match_id', 'batting_team', 'batter']]

In [30]:
df1.head(2)

Unnamed: 0,id,player_of_match,winner
0,335982,BB McCullum,Kolkata Knight Riders
1,335983,MEK Hussey,Chennai Super Kings


In [31]:
df2.head(2)

Unnamed: 0,match_id,batting_team,batter
0,335982,Kolkata Knight Riders,SC Ganguly
1,335982,Kolkata Knight Riders,BB McCullum


## Data Cleaning and Preparing

In [16]:
match_columns=match[['match_id', 'match_winner', 'manofmach']].copy()
player_columns=player[['match_id','player_name','player_team']].copy()

KeyError: "None of [Index(['match_id', 'match_winner', 'manofmach'], dtype='object')] are in the [columns]"

In [36]:
match_columns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 637 entries, 0 to 636
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   match_id      637 non-null    int64 
 1   match_winner  634 non-null    object
 2   manofmach     633 non-null    object
dtypes: int64(1), object(2)
memory usage: 15.1+ KB


Let's remove all the rows that has null values in manofmach column

In [37]:
match_columns=match_columns.dropna(subset=['manofmach'])

In [38]:
match_columns['match_winner'].value_counts()

match_winner
Mumbai Indians                 91
Chennai Super Kings            79
Kolkata Knight Riders          77
Royal Challengers Bangalore    73
Kings XI Punjab                70
Rajasthan Royals               63
Delhi Daredevils               62
Sunrisers Hyderabad            42
Deccan Chargers                29
Rising Pune Supergiants        15
Gujarat Lions                  13
Pune Warriors                  12
Kochi Tuskers Kerala            6
tied                            1
Name: count, dtype: int64

There is one match that ended in a tie; however, the "Man of the Match" trophy was awarded to a specific player. We will remove this as it has no impact on the match result.

In [39]:
match_columns=match_columns.copy()
match_columns.drop(match_columns[match_columns['match_winner'] == 'tied'].index, inplace=True)

In [40]:
match_player = pd.merge(
    match_columns,
    player_columns[['match_id', 'player_name', 'player_team']],  
    left_on=['match_id', 'manofmach'],  
    right_on=['match_id', 'player_name'],  
    how='left' 
)

In [41]:
match_player

Unnamed: 0,match_id,match_winner,manofmach,player_name,player_team
0,335987,Kolkata Knight Riders,BB McCullum,BB McCullum,Kolkata Knight Riders
1,335988,Chennai Super Kings,MEK Hussey,MEK Hussey,Chennai Super Kings
2,335989,Delhi Daredevils,MF Maharoof,MF Maharoof,Delhi Daredevils
3,335990,Royal Challengers Bangalore,MV Boucher,MV Boucher,Royal Challengers Bangalore
4,335991,Kolkata Knight Riders,DJ Hussey,DJ Hussey,Kolkata Knight Riders
...,...,...,...,...,...
627,1082646,Royal Challengers Bangalore,HV Patel,HV Patel,Royal Challengers Bangalore
628,1082647,Rising Pune Supergiants,Wasington Sundar,,
629,1082648,Kolkata Knight Riders,NM Coulter-Nile,NM Coulter-Nile,Kolkata Knight Riders
630,1082649,Mumbai Indians,KV Sharma,KV Sharma,Mumbai Indians


In [42]:
match_player.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 632 entries, 0 to 631
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   match_id      632 non-null    int64 
 1   match_winner  632 non-null    object
 2   manofmach     632 non-null    object
 3   player_name   630 non-null    object
 4   player_team   630 non-null    object
dtypes: int64(1), object(4)
memory usage: 24.8+ KB


In [43]:
match_player[match_player['player_name'].isna()]

Unnamed: 0,match_id,match_winner,manofmach,player_name,player_team
584,1082601,Kolkata Knight Riders,Narine,,
628,1082647,Rising Pune Supergiants,Wasington Sundar,,


In [44]:
player_columns[player_columns['match_id'] == 1082601 ]
player_columns[player_columns['match_id'] == 1082647 ]

Unnamed: 0,match_id,player_name,player_team
13904,1082647,PA Patel,Mumbai Indians
13905,1082647,MS Dhoni,Rising Pune Supergiants
13906,1082647,RG Sharma,Mumbai Indians
13907,1082647,AM Rahane,Rising Pune Supergiants
13908,1082647,MK Tiwary,Rising Pune Supergiants
13909,1082647,KV Sharma,Mumbai Indians
13910,1082647,SL Malinga,Mumbai Indians
13911,1082647,AT Rayudu,Mumbai Indians
13912,1082647,KA Pollard,Mumbai Indians
13913,1082647,DT Christian,Rising Pune Supergiants


this was due to the different naming in 2 tables. Now let's add the value of player_name and player_team column with match_id=1082601 and 1082647

In [45]:
match_player.loc[match_player['match_id'] == 1082601, ['player_name', 'player_team']] = ['Narine', 'Kolkata Knight Riders']
match_player.loc[match_player['match_id'] == 1082647, ['player_name', 'player_team']] = ['Wasington Sundar', 'Rising Pune Supergiants']

In [46]:
match_player

Unnamed: 0,match_id,match_winner,manofmach,player_name,player_team
0,335987,Kolkata Knight Riders,BB McCullum,BB McCullum,Kolkata Knight Riders
1,335988,Chennai Super Kings,MEK Hussey,MEK Hussey,Chennai Super Kings
2,335989,Delhi Daredevils,MF Maharoof,MF Maharoof,Delhi Daredevils
3,335990,Royal Challengers Bangalore,MV Boucher,MV Boucher,Royal Challengers Bangalore
4,335991,Kolkata Knight Riders,DJ Hussey,DJ Hussey,Kolkata Knight Riders
...,...,...,...,...,...
627,1082646,Royal Challengers Bangalore,HV Patel,HV Patel,Royal Challengers Bangalore
628,1082647,Rising Pune Supergiants,Wasington Sundar,Wasington Sundar,Rising Pune Supergiants
629,1082648,Kolkata Knight Riders,NM Coulter-Nile,NM Coulter-Nile,Kolkata Knight Riders
630,1082649,Mumbai Indians,KV Sharma,KV Sharma,Mumbai Indians


In [47]:
match_player['player_impact']=(match_player['match_winner'] == match_player['player_team']).astype(int)

In [48]:
match_player

Unnamed: 0,match_id,match_winner,manofmach,player_name,player_team,player_impact
0,335987,Kolkata Knight Riders,BB McCullum,BB McCullum,Kolkata Knight Riders,1
1,335988,Chennai Super Kings,MEK Hussey,MEK Hussey,Chennai Super Kings,1
2,335989,Delhi Daredevils,MF Maharoof,MF Maharoof,Delhi Daredevils,1
3,335990,Royal Challengers Bangalore,MV Boucher,MV Boucher,Royal Challengers Bangalore,1
4,335991,Kolkata Knight Riders,DJ Hussey,DJ Hussey,Kolkata Knight Riders,1
...,...,...,...,...,...,...
627,1082646,Royal Challengers Bangalore,HV Patel,HV Patel,Royal Challengers Bangalore,1
628,1082647,Rising Pune Supergiants,Wasington Sundar,Wasington Sundar,Rising Pune Supergiants,1
629,1082648,Kolkata Knight Riders,NM Coulter-Nile,NM Coulter-Nile,Kolkata Knight Riders,1
630,1082649,Mumbai Indians,KV Sharma,KV Sharma,Mumbai Indians,1


## Hypothesis Testing

**Null Hypothesis (H₀):** 
The "Man of the Match" award is equally likely to be given to a player from either the winning team or the losing team. (There is no relationship between the MoM and the winning team status.)

**Alternative Hypothesis (H₁):**
The "Man of the Match" award is more likely to be given to a player from the winning team. (There is a relationship between the MoM and the winning team status.)

In [49]:
# Count occurrences of 1s and 0s in the 'player_impact' column
contingency_table = pd.crosstab(index=match_player['player_impact'], columns='count')

# Perform Chi-Square Test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Print the results
print(f"Chi-Square Statistic: {chi2}")
print(f"P-Value: {p}")
print(f"Degrees of Freedom: {dof}")
print(f"Expected Frequencies:\n{expected}")

# Interpret the result
alpha = 0.05
if p < alpha:
    print("Reject the null hypothesis: The MoM is more likely to be from the winning team.")
else:
    print("Fail to reject the null hypothesis: No significant association between MoM and match outcome.")


Chi-Square Statistic: 0.0
P-Value: 1.0
Degrees of Freedom: 0
Expected Frequencies:
[[ 14.]
 [618.]]
Fail to reject the null hypothesis: No significant association between MoM and match outcome.


*Since we failed to reject the null hypothesis, we conclude that there is no significant evidence of a relationship between the Man of the Match award and the team’s winning status.*