In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sn

import plotly.express as px

In [4]:
batsmans = pd.read_csv('cleaned_batsman_data.csv')
bowlers = pd.read_csv('cleaned_bowlers_data.csv')
players = pd.read_csv('world_cup_players_info.csv')
matches = pd.read_csv('match_schedule_results.csv')

In [5]:
most_run = batsmans.groupby('Batsman_Name')['Runs'].sum(
).sort_values(ascending=False).reset_index()

most_run

Unnamed: 0,Batsman_Name,Runs
0,Virat Kohli,765
1,Quinton de Kock,706
2,Rohit Sharma,597
3,Rachin Ravindra,578
4,David Warner,577
...,...,...
141,Dushan Hemantha,4
142,Lahiru Kumara,4
143,Fazalhaq Farooqi,2
144,Alex Carey,0


In [6]:
most_run_25 = most_run.head(25)

fig = px.bar(most_run_25, x='Batsman_Name', y='Runs', title='Most Runs',

             labels={'Runs': 'Total Runs', 'Batsman_Name': 'Batsman_Name'}, color='Runs', color_continuous_scale='viridis',)

fig.update_layout(xaxis_title='Batsman Name',
                  yaxis_title='Most Runs', height=600, width=800)
fig.update_layout(template='ggplot2')
fig.update_traces(texttemplate="%{y}", textposition="outside")
fig.show()

In [7]:
four = batsmans.groupby('Batsman_Name')['4s'].sum().sort_values(
    ascending=False).reset_index()
four_25 = four.head(25)
fig = px.line(four_25, x='Batsman_Name', y='4s', title='Most Fours',
              labels={'Batsman_Name': 'Batsman_Name', 'Fours': '4s'},
              line_shape="linear",
              markers=True,
              color='4s'
              )
fig.update_layout(xaxis_title='Batsman Names', yaxis_title='Fours')
fig.update_layout(template='ggplot2')
fig.update_traces(marker=dict(size=12))
fig.show()

In [8]:
sixes = batsmans.groupby('Batsman_Name')['6s'].sum(
).sort_values(ascending=False).reset_index()
sixes_25 = sixes.head(25)
fig = px.line(sixes_25, x='Batsman_Name', y='6s', title='Most Sixes',
              labels={'Batsman_Name': 'Batsman_Name', 'Sixes': '6s'},
              line_shape="linear",
              markers=True,
              color='6s'
              )
fig.update_layout(xaxis_title='Batsman Names', yaxis_title='Sixes')
fig.update_layout(template='ggplot2')
fig.update_traces(marker=dict(size=12))

fig.show()

In [9]:
boundaries = four.merge(sixes, on="Batsman_Name", how="outer")
boundaries["Total_Boundaries"] = boundaries["4s"] + boundaries["6s"]
boundaries = boundaries.sort_values(
    by="Total_Boundaries", ascending=False).reset_index()
boundaries_25 = boundaries.head(25)
boundaries_25.head(5)

Unnamed: 0,index,Batsman_Name,4s,6s,Total_Boundaries
0,113,Rohit Sharma,66,31,97
1,103,Quinton de Kock,65,26,91
2,21,David Warner,53,28,81
3,143,Virat Kohli,68,9,77
4,104,Rachin Ravindra,55,17,72


In [10]:
fig = px.line(boundaries_25, x='Batsman_Name', y='Total_Boundaries', title='Most Boundaries',
              labels={'Batsman_Name': 'Batsman_Name', 'Total_Boundaries': 'Total_Boundaries'},
              line_shape="linear",
              markers=True,
              color='Total_Boundaries'
              )
fig.update_layout(xaxis_title='Batsman Names', yaxis_title='Most Boundaries')
fig.update_layout(template='ggplot2')
fig.update_traces(marker=dict(size=12))

fig.show()

In [11]:
country_wise_runs = batsmans.groupby('Team_Innings')['Runs'].sum(
).sort_values(ascending=False).reset_index()

country_wise_runs

Unnamed: 0,Team_Innings,Runs
0,Australia,3303
1,South Africa,3272
2,India,3038
3,New Zealand,2712
4,Pakistan,2220
5,England,2135
6,Afghanistan,1990
7,Bangladesh,1944
8,Sri Lanka,1942
9,Netherlands,1728


In [13]:
fig = px.bar(country_wise_runs, x='Team_Innings', y='Runs', title='Most Runs Country Wise',

             labels={'Runs': 'Total Runs', 'Team_Innings': 'Team_Innings'}, color='Runs', color_continuous_scale='viridis',)

fig.update_layout(xaxis_title='Team Name',
                  yaxis_title='Most Runs', height=600, width=800)
fig.update_layout(template='ggplot2')
fig.update_traces(texttemplate="%{y}", textposition="outside")
fig.show()

In [14]:
country_wise_balls = batsmans.groupby('Team_Innings')['Balls'].sum(
).sort_values(ascending=False).reset_index()

country_wise_balls

Unnamed: 0,Team_Innings,Balls
0,Australia,3578
1,South Africa,3388
2,India,2985
3,New Zealand,2627
4,Bangladesh,2459
5,Afghanistan,2395
6,Pakistan,2309
7,Netherlands,2300
8,England,2275
9,Sri Lanka,2268


In [18]:
fig = px.bar(country_wise_balls, x='Team_Innings', y='Balls', title='Most Bowls Faced Country Wise',

             labels={'Balls': 'Total Balls', 'Team_Innings': 'Team_Innings'}, color='Balls', color_continuous_scale='viridis',)

fig.update_layout(xaxis_title='Team Name',
                  yaxis_title='Most Balls', height=600, width=800)
fig.update_layout(template='ggplot2')
fig.update_traces(texttemplate="%{y}", textposition="outside")
fig.show()

In [28]:
total_runs_per_batsman = batsmans.groupby(['Team_Innings', 'Batsman_Name'])[
    'Runs'].sum().reset_index()

top_scorers = total_runs_per_batsman.loc[total_runs_per_batsman.groupby(
    "Team_Innings")["Runs"].idxmax()]

top_scorers = top_scorers.sort_values(
    by="Runs", ascending=False).reset_index(drop=True)

top_scorers

Unnamed: 0,Team_Innings,Batsman_Name,Runs
0,India,Virat Kohli,765
1,South Africa,Quinton de Kock,706
2,New Zealand,Rachin Ravindra,578
3,Australia,David Warner,577
4,England,Dawid Malan,404
5,Pakistan,Mohammad Rizwan,395
6,Afghanistan,Ibrahim Zadran,376
7,Sri Lanka,Sadeera Samarawickrama,373
8,Bangladesh,Mahmudullah,328
9,Netherlands,Sybrand Engelbrecht,300


In [49]:
fig = px.bar(top_scorers, x='Team_Innings', y='Runs', title='Most Runs for each country',

             labels={'Runs': 'Total Runs', 'Team_Innings': 'Team_Innings'}, color='Runs', color_continuous_scale='viridis',text="Batsman_Name")

fig.update_layout(xaxis_title='Team Name',
                  yaxis_title='Most Runs', height=600, width=800)
fig.update_layout(template='ggplot2')
fig.update_traces(
    text=[f"{Batsman_Name}({Runs})" for Batsman_Name, Runs in zip(
        top_scorers["Batsman_Name"], top_scorers["Runs"])],
    texttemplate="%{text}",
    textposition="outside"
)
fig.show()

In [65]:
batsman_strike_rate = batsmans.groupby("Batsman_Name").agg(
    {"Runs": "sum", "Balls": "sum"}).reset_index()
batsman_strike_rate["Strike_Rate"] = (
    batsman_strike_rate["Runs"] / batsman_strike_rate["Balls"]) * 100
batsman_strike_rate = batsman_strike_rate.sort_values(
    by="Strike_Rate", ascending=False)
batsman_strike_rate_valid = batsman_strike_rate[batsman_strike_rate["Balls"]>=60]
batsman_strike_rate_valid

Unnamed: 0,Batsman_Name,Runs,Balls,Strike_Rate
34,Glenn Maxwell,404,288,140.277778
76,Mark Wood,85,64,132.812500
139,Travis Head,391,306,127.777778
83,Mitchell Santner,103,81,127.160494
43,Heinrich Klaasen,449,355,126.478873
...,...,...,...,...
71,Maheesh Theekshana,100,185,54.054054
133,Taskin Ahmed,49,93,52.688172
94,Mustafizur Rahman,44,92,47.826087
70,Mahedi Hasan,33,73,45.205479


In [66]:
fig = px.bar(batsman_strike_rate_valid.head(25), x='Batsman_Name', y='Strike_Rate', title='Highest SR (More than 10 overs faced)',

             labels={'Strike Rate': 'Strike_Rate', 'Batsman Name': 'Batsman_Name'}, color='Strike_Rate', color_continuous_scale='viridis',)

fig.update_layout(xaxis_title='Batsman Name',
                  yaxis_title='Strike Rate', height=600, width=800)
fig.update_layout(template='ggplot2')
fig.update_traces(texttemplate="%{y}", textposition="outside")
fig.show()

In [73]:
batsmans_avg = batsmans
batsmans_avg["Is_Out"] = (batsmans_avg["Dismissal"]!="not out").astype(int)
batsmans_avg.head(10)

Unnamed: 0,Match_no,Match_Between,Team_Innings,Batsman_Name,Batting_Position,Dismissal,Runs,Balls,4s,6s,Strike_Rate,Is_Out
0,1,England vs New Zealand,England,Jonny Bairstow,1,c Daryl Mitchell b Mitchell Santner,33,35,4,1,94.3,1
1,1,England vs New Zealand,England,Dawid Malan,2,c Tom Latham b Matt Henry,14,24,2,0,58.3,1
2,1,England vs New Zealand,England,Joe Root,3,b Glenn Phillips,77,86,4,1,89.5,1
3,1,England vs New Zealand,England,Harry Brook,4,c Devon Conway b Rachin Ravindra,25,16,4,1,156.3,1
4,1,England vs New Zealand,England,Moeen Ali,5,b Glenn Phillips,11,17,1,0,64.7,1
5,1,England vs New Zealand,England,Jos Buttler,6,c Tom Latham b Matt Henry,43,42,2,2,102.4,1
6,1,England vs New Zealand,England,Liam Livingstone,7,c Matt Henry b Trent Boult,20,22,3,0,90.9,1
7,1,England vs New Zealand,England,Sam Curran,8,c Tom Latham b Matt Henry,14,19,0,0,73.7,1
8,1,England vs New Zealand,England,Chris Woakes,9,c Will Young b Mitchell Santner,11,12,1,0,91.7,1
9,1,England vs New Zealand,England,Adil Rashid,10,not out,15,13,0,1,115.4,0


In [None]:
batsman_avg = batsmans_avg.groupby("Batsman_Name").agg(
    {"Runs": "sum", "Is_Out": "sum"}).reset_index()
def calcavg(Runs,Is_Out):
    return Runs/Is_Out if Is_Out >= 1 else Runs
batsman_avg["Batting_Average"] = batsman_avg.apply(
    lambda x: calcavg(x["Runs"], x["Is_Out"]), axis=1)
batsman_avg = batsman_avg.sort_values(by="Batting_Average", ascending=False).reset_index(drop=True)
batsman_avg

Unnamed: 0,Batsman_Name,Runs,Is_Out,Batting_Average
0,Virat Kohli,765,8,95.625000
1,KL Rahul,452,6,75.333333
2,Fakhar Zaman,220,3,73.333333
3,Azmatullah Omarzai,353,5,70.600000
4,Daryl Mitchell,552,8,69.000000
...,...,...,...,...
141,Fazalhaq Farooqi,2,1,2.000000
142,Josh Hazlewood,6,3,2.000000
143,Dushmantha Chameera,6,4,1.500000
144,Alex Carey,0,1,0.000000


In [None]:
fig = px.bar(batsman_avg.head(25),
             x='Batsman_Name',
             y='Batting_Average',
             title='Highest Batting Averages',
             labels={'Batting_Average': 'Batting Average',
                     'Batsman_Name': 'Batsman Name'},
             color='Batting_Average',
             text = 'Is_Out',
             color_continuous_scale='viridis')

fig.update_layout(xaxis_title='Batsman Name',
                  yaxis_title='Batting Average',
                  height=600, width=800)

fig.update_layout(template='ggplot2')
fig.update_traces(texttemplate="%{y:.2f}", textposition="outside")
fig.show()

In [106]:
catch_data = batsmans["Dismissal"].str.extract(r'c ([\w\s]+) b')
catch_data.dropna()
catch_count = catch_data.value_counts().reset_index()
catch_count.columns = ["Fielder", "Catches"]
catch_count

Unnamed: 0,Fielder,Catches
0,Quinton de Kock,22
1,Josh Inglis,17
2,KL Rahul,16
3,Scott Edwards,12
4,Daryl Mitchell,11
...,...,...
120,Sam Curran,1
121,Shoriful Islam,1
122,Suryakumar Yadav,1
123,Vikramjit Singh,1


In [103]:
fig = px.bar(catch_count.head(25),
             x='Fielder',
             y='Catches',
             title='Top 25 Players with Most Catches in CWC 2023',
             labels={'Catches': 'Total Catches', 'Fielder': 'Player'},
             color='Catches',
             text='Catches',  # Displaying catch count as text
             color_continuous_scale='viridis')

fig.update_layout(xaxis_title='Player Name',
                  yaxis_title='Total Catches',
                  height=600, width=800)

fig.update_layout(template='ggplot2')
# Show exact values
fig.update_traces(texttemplate="%{y}", textposition="outside")
fig.show()