# Zidio Project 

# Load and Preprocess Data

In [1]:
import pandas as pd

In [2]:
matches_df = pd.read_csv(r"matches.csv")
deliveries_df = pd.read_csv(r"deliveries.csv")

Fill Missing Values

In [3]:
matches_df['city'].fillna('Unknown', inplace=True)
matches_df['player_of_match'].fillna('No Award', inplace=True)
matches_df['winner'].fillna('No Result', inplace=True)
matches_df['result_margin'].fillna(0, inplace=True)
matches_df.drop(columns=['method'], inplace=True)

Convert date column to datetime

In [4]:
matches_df['date'] = pd.to_datetime(matches_df['date'])

Standardize Team names

In [5]:
team_replacements = {
    "Delhi Daredevils": "Delhi Capitals",
    "Kings XI Punjab": "Punjab Kings",
    "Rising Pune Supergiant": "Rising Pune Supergiants",
}
matches_df.replace({"team1": team_replacements, "team2": team_replacements, 
                    "toss_winner": team_replacements, "winner": team_replacements}, inplace=True)
deliveries_df.replace({"batting_team": team_replacements, "bowling_team": team_replacements}, inplace=True)

# Match and Team Performance Analysis

In [6]:
matches_played = matches_df['team1'].value_counts() + matches_df['team2'].value_counts()
team_wins = matches_df['winner'].value_counts()
win_percentage = (team_wins / matches_played * 100).round(2)

In [7]:
team_performance_df = pd.DataFrame({
    'Matches Played': matches_played,
    'Wins': team_wins,
    'Win Percentage': win_percentage
}).fillna(0).astype(int)

In [15]:
print("Team Performance Analysis:\n", team_performance_df)

Team Performance Analysis:
                              Matches Played  Wins  Win Percentage
Chennai Super Kings                     238   138              57
Deccan Chargers                          75    29              38
Delhi Capitals                          252   115              45
Gujarat Lions                            30    13              43
Gujarat Titans                           45    28              62
Kochi Tuskers Kerala                     14     6              42
Kolkata Knight Riders                   251   131              52
Lucknow Super Giants                     44    24              54
Mumbai Indians                          261   144              55
No Result                                 0     5               0
Pune Warriors                            46    12              26
Punjab Kings                            246   112              45
Rajasthan Royals                        221   112              50
Rising Pune Supergiants                  30    1

# Player Statistics and Rankings

In [8]:
top_batsmen = deliveries_df.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(10)
valid_wickets = deliveries_df[deliveries_df['dismissal_kind'] != 'None']
top_bowlers = valid_wickets['bowler'].value_counts().head(10)

In [9]:
batsman_balls_faced = deliveries_df.groupby('batter')['ball'].count()
batsman_strike_rate = ((top_batsmen / batsman_balls_faced) * 100).dropna().sort_values(ascending=False).head(10)

In [10]:
bowler_runs_conceded = deliveries_df.groupby('bowler')['total_runs'].sum()
bowler_overs_bowled = deliveries_df.groupby('bowler')['over'].count() / 6
bowler_economy_rate = (bowler_runs_conceded / bowler_overs_bowled).dropna().sort_values().head(10)

In [16]:
print("Top Batsmen:\n", top_batsmen)
print("Top Bowlers:\n", top_bowlers)
print("Batsman Strike Rates:\n", batsman_strike_rate)
print("Bowler Economy Rates:\n", bowler_economy_rate)

Top Batsmen:
 batter
V Kohli           8014
S Dhawan          6769
RG Sharma         6630
DA Warner         6567
SK Raina          5536
MS Dhoni          5243
AB de Villiers    5181
CH Gayle          4997
RV Uthappa        4954
KD Karthik        4843
Name: batsman_runs, dtype: int64
Top Bowlers:
 R Ashwin           4679
SP Narine          4146
B Kumar            4060
PP Chawla          3895
RA Jadeja          3895
YS Chahal          3628
Harbhajan Singh    3496
A Mishra           3444
DJ Bravo           3296
UT Yadav           3190
Name: bowler, dtype: int64
Batsman Strike Rates:
 batter
AB de Villiers    148.580442
CH Gayle          142.121729
DA Warner         135.429986
MS Dhoni          132.835065
SK Raina          132.535312
KD Karthik        131.353404
V Kohli           128.511867
RG Sharma         127.918194
RV Uthappa        126.152279
S Dhawan          123.454313
dtype: float64
Bowler Economy Rates:
 bowler
AC Gilchrist     0.000000
R Ravindra       3.500000
NB Singh         4

# Venue and Toss Impact Analysis

In [11]:
venue_wins = matches_df.groupby('venue')['winner'].value_counts(normalize=True).unstack().fillna(0) * 100
toss_win_match_win_percentage = ((matches_df[matches_df['toss_winner'] == matches_df['winner']].shape[0]) / matches_df.shape[0]) * 100
toss_decision_wins = matches_df.groupby('toss_decision')['winner'].count()

In [17]:
print("Venue Win Percentages:\n", venue_wins.head(10))
print("Toss Win Impact: ", toss_win_match_win_percentage, "% of teams winning the toss also won the match.")
print("Toss Decision Wins:\n", toss_decision_wins)

Venue Win Percentages:
 winner                                              Chennai Super Kings  \
venue                                                                     
Arun Jaitley Stadium                                           7.142857   
Arun Jaitley Stadium, Delhi                                   12.500000   
Barabati Stadium                                               0.000000   
Barsapara Cricket Stadium, Guwahati                            0.000000   
Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cr...             0.000000   
Brabourne Stadium                                             10.000000   
Brabourne Stadium, Mumbai                                      0.000000   
Buffalo Park                                                  33.333333   
De Beers Diamond Oval                                         33.333333   
Dr DY Patil Sports Academy                                    17.647059   

winner                                              Deccan Chargers  \
venu

#  Head to Head Team Comaprisons

In [12]:
matches_df['team_pair'] = matches_df.apply(lambda row: tuple(sorted([row['team1'], row['team2']])), axis=1)
head_to_head = matches_df.groupby(['team_pair', 'winner']).size().unstack(fill_value=0)
one_sided_rivalries = (head_to_head.max(axis=1) - head_to_head.min(axis=1)).sort_values(ascending=False).head(10)

In [18]:
print("Head-to-Head Records:\n", head_to_head.head(10))
print("Most One-Sided Rivalries:\n", one_sided_rivalries)

Head-to-Head Records:
 winner                                        Chennai Super Kings  \
team_pair                                                           
(Chennai Super Kings, Deccan Chargers)                          6   
(Chennai Super Kings, Delhi Capitals)                          19   
(Chennai Super Kings, Gujarat Titans)                           3   
(Chennai Super Kings, Kochi Tuskers Kerala)                     1   
(Chennai Super Kings, Kolkata Knight Riders)                   19   
(Chennai Super Kings, Lucknow Super Giants)                     1   
(Chennai Super Kings, Mumbai Indians)                          17   
(Chennai Super Kings, Pune Warriors)                            4   
(Chennai Super Kings, Punjab Kings)                            16   
(Chennai Super Kings, Rajasthan Royals)                        16   

winner                                        Deccan Chargers  Delhi Capitals  \
team_pair                                                          

# Win Prediction and Trend Analysis

In [13]:
win_trends = matches_df[['season', 'winner', 'team1', 'team2', 'toss_winner', 'toss_decision', 'venue']]
season_wins = win_trends.groupby(['season', 'winner']).size().unstack(fill_value=0)
dominant_teams = season_wins.idxmax(axis=1)
team_win_trends = season_wins.sum(axis=0).sort_values(ascending=False)
toss_trends = win_trends.groupby(['season', 'toss_decision'])['winner'].count().unstack(fill_value=0)

In [19]:
print("Season-Wise Wins:\n", season_wins.tail(5))
print("Dominant Teams per Season:\n", dominant_teams.tail(5))
print("Overall Team Win Trends:\n", team_win_trends.head(10))
print("Toss Trends Over Seasons:\n", toss_trends.tail(5))

Season-Wise Wins:
 winner   Chennai Super Kings  Deccan Chargers  Delhi Capitals  Gujarat Lions  \
season                                                                         
2020/21                    6                0               9              0   
2021                      11                0              10              0   
2022                       4                0               7              0   
2023                      10                0               5              0   
2024                       7                0               7              0   

winner   Gujarat Titans  Kochi Tuskers Kerala  Kolkata Knight Riders  \
season                                                                 
2020/21               0                     0                      7   
2021                  0                     0                      9   
2022                 12                     0                      6   
2023                 11                     0               

In [14]:
cleaned_matches_file = "cleaned_matches.csv"
cleaned_deliveries_file = "cleaned_deliveries.csv"
matches_df.to_csv(cleaned_matches_file, index=False)
deliveries_df.to_csv(cleaned_deliveries_file, index=False)