<a href="https://www.kaggle.com/code/avtnshm/cricket-world-cup-2023-analysis?scriptVersionId=151408688" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
#Loading the file
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter("ignore")
cwc2023 = pd.read_csv('/kaggle/input/cwc2023-match-statistics-updated-daily/CWC2023.csv') #copy the path of the file from the sidebar.
cwc2023.tail(2)

Unnamed: 0,Match_No,Team_1,Team_2,Venue,Toss,Choice,Innings1_Run,Innings1_Balls,Innings1_Wickets,Innings2_Run,Innings2_Balls,Innings2_Wickets,Winner,Margin_Runs_or_Wickets
46,47,Australia,South Africa,Kolkata,South Africa,Bat,212,298,10,215,284,7,Australia,3W
47,48,Australia,India,Ahemedabad,Australia,Ball,240,300,10,241,258,4,Australia,6W


# Teams playing in the CWC2023

In [2]:
import plotly.express as px
import geopandas as gpd
team_names = cwc2023['Winner'].unique()
team_data = pd.DataFrame({'Team Name': team_names})
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
merged_data = world.merge(team_data, left_on='name', right_on='Team Name', how='left')

fig = px.choropleth(
    merged_data,
    locations="iso_a3",
    color="Team Name",
    hover_name="name",
    title="CWC2023 Teams",
)

fig.show()

In [3]:
#the dataset is being updated daily till 19th November 2023, so just explore and gain insights and if you are a cricket fan, do enjoy the mathces too
cwc2023['Margin'] = cwc2023.apply(lambda row: f"{row['Innings1_Run'] - row['Innings2_Run']}R" if row['Innings1_Run'] > row['Innings2_Run'] else f"{10 - row['Innings2_Wickets']}W", axis=1)
cwc2023['Loser'] = np.where(cwc2023['Winner'] == cwc2023['Team_1'], cwc2023['Team_2'], cwc2023['Team_1'])
cwc2023.tail(2) #adding the margin and loser columns by pandas calculation

Unnamed: 0,Match_No,Team_1,Team_2,Venue,Toss,Choice,Innings1_Run,Innings1_Balls,Innings1_Wickets,Innings2_Run,Innings2_Balls,Innings2_Wickets,Winner,Margin_Runs_or_Wickets,Margin,Loser
46,47,Australia,South Africa,Kolkata,South Africa,Bat,212,298,10,215,284,7,Australia,3W,3W,South Africa
47,48,Australia,India,Ahemedabad,Australia,Ball,240,300,10,241,258,4,Australia,6W,6W,India


# Winning the toss and losing the game

In [4]:
from scipy.stats import chi2_contingency
contingency_table_choice = pd.crosstab(cwc2023['Toss'], cwc2023['Loser'])
chi2, p, _, _ = chi2_contingency(contingency_table_choice)
alpha = 0.05
if p < alpha:
    print("There is a statistically significant association between 'Toss' and 'Loser'.")
else:
    print("There is no statistically significant association between 'Toss' and 'Loser'.")
print("The p-value is:", p)

There is a statistically significant association between 'Toss' and 'Loser'.
The p-value is: 2.490386026380861e-10


In [5]:
total_matches = len(cwc2023)
matches_where_toss_winner_is_loser = (cwc2023['Toss'] != cwc2023['Winner']).sum()
probability_toss_winner_is_loser = matches_where_toss_winner_is_loser / total_matches
print(f"The probability of the team winning the toss also being the losing team is: {probability_toss_winner_is_loser:.2%}")


The probability of the team winning the toss also being the losing team is: 60.42%


# Teams Qualified for Semis

In [6]:
team_wins = cwc2023['Winner'].value_counts()
teams_with_5_or_more_wins = team_wins[team_wins >=5].index
matches_of_interest = cwc2023[cwc2023['Winner'].isin(teams_with_5_or_more_wins)]

In [7]:
country_wins = matches_of_interest['Winner'].value_counts().reset_index()
country_wins.columns = ['name', 'Wins']

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
merged_data = world.merge(country_wins, on='name', how='left')

fig = px.choropleth(
    merged_data,
    locations="iso_a3",
    color="Wins",
    hover_name="name",
    title="Teams Qualified for Semi-Finals",
)
print(country_wins)
fig.show()


           name  Wins
0         India    10
1     Australia     9
2  South Africa     7
3   New Zealand     5


# Road to Semi finals

In [8]:
top_four_teams = team_wins[team_wins >= 5].index
matches_of_top_four_teams = cwc2023[cwc2023['Winner'].isin(top_four_teams) | cwc2023['Loser'].isin(top_four_teams)]
df1= matches_of_top_four_teams
df1

Unnamed: 0,Match_No,Team_1,Team_2,Venue,Toss,Choice,Innings1_Run,Innings1_Balls,Innings1_Wickets,Innings2_Run,Innings2_Balls,Innings2_Wickets,Winner,Margin_Runs_or_Wickets,Margin,Loser
0,1,New Zealand,United Kingdom,Ahemedabad,New Zealand,Ball,282,300,9,283,218,1,New Zealand,9W,9W,United Kingdom
3,4,South Africa,Sri Lanka,New Delhi,SriLanka,Ball,428,300,5,326,269,10,South Africa,102R,102R,Sri Lanka
4,5,India,Australia,Chennai,Australia,Bat,199,297,10,201,248,4,India,6W,6W,Australia
5,6,New Zealand,Netherlands,Hyderabad,Netherlands,Ball,322,300,7,223,279,10,New Zealand,99R,99R,Netherlands
8,9,India,Afghanistan,New Delhi,Afghanistan,Bat,272,300,8,273,210,2,India,8W,8W,Afghanistan
9,10,South Africa,Australia,Lucknow,Australia,Ball,311,300,7,177,345,10,South Africa,134R,134R,Australia
10,11,New Zealand,Bangladesh,Chennai,New Zealand,Ball,245,300,9,248,257,2,New Zealand,8W,8W,Bangladesh
11,12,India,Pakistan,Ahemedabad,India,Ball,191,257,10,192,183,3,India,7W,7W,Pakistan
13,14,Australia,Sri Lanka,Lucknow,SriLanka,Bat,209,261,10,215,212,5,Australia,5W,5W,Sri Lanka
14,15,Netherlands,South Africa,Dharamshala,South Africa,Ball,245,258,8,207,257,10,Netherlands,38R,38R,South Africa


### Australia won the CWC2023 on 19/11/2023 by 6W at Ahemdabad