In [None]:
import pandas as pd

In [None]:
deliveries_data = pd.read_csv("/content/deliveries.csv")
matches_data = pd.read_csv("/content/matches.csv")

In [None]:
print(deliveries_data.columns)

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')


In [None]:
# Group by batsman and calculate total runs
batsman_total_runs = deliveries_data.groupby("batter")["batsman_runs"].sum().reset_index()


In [None]:
# Rename columns for better readability
batsman_total_runs.columns = ["Batsman", "Total Runs"]

In [None]:
# Find the batsman with the highest runs
highest_scorer = batsman_total_runs.sort_values(by="Total Runs", ascending=False).head(1)
print(highest_scorer)

     Batsman  Total Runs
631  V Kohli        8014


In [None]:
# Find the batsman with the highest runs
highest_scorer = batsman_total_runs.sort_values(by="Total Runs", ascending=True).head(1)
print(highest_scorer)

       Batsman  Total Runs
225  IC Pandey           0


In [None]:
runs_stats = {
    'Mean': deliveries_data['batsman_runs'].mean(),
    'Median': deliveries_data['batsman_runs'].median(),
    'Mode': deliveries_data['batsman_runs'].mode()[0]
}
print("Runs Statistics:", runs_stats)


Runs Statistics: {'Mean': 1.265000766518473, 'Median': 1.0, 'Mode': 0}


In [None]:
wickets_data = deliveries_data[deliveries_data['is_wicket'] == 1]
wickets_per_player = wickets_data.groupby('bowler').size()

In [None]:
wickets_stats = {
    'Mean': wickets_per_player.mean(),
    'Median': wickets_per_player.median(),
    'Mode': wickets_per_player.mode()[0] if not wickets_per_player.mode().empty else 0
}
print("Wickets Statistics:", wickets_stats)

Wickets Statistics: {'Mean': 28.46153846153846, 'Median': 11.0, 'Mode': 1}


In [None]:
batting_data = pd.DataFrame({
    "Player": ["Player A", "Player B", "Player C"],
    "Average": [55.0, 45.0, 40.0],
    "Strike Rate": [140.0, 130.0, 120.0],
    "100s": [4, 5, 4],
    "50s": [6, 0, 5],
})

In [None]:
# Create a copy for normalized data
normalized_data = batting_data.copy()

In [None]:
# Normalize each column
for metric in ["Average", "Strike Rate", "100s", "50s"]:
    normalized_data[f"Normalized {metric}"] = (
        batting_data[metric] / batting_data[metric].max()
    ) * 100

In [None]:
# Display the normalized columns
print(normalized_data[["Player", "Normalized Average", "Normalized Strike Rate", "Normalized 100s", "Normalized 50s"]])


     Player  Normalized Average  Normalized Strike Rate  Normalized 100s  \
0  Player A          100.000000              100.000000             80.0   
1  Player B           81.818182               92.857143            100.0   
2  Player C           72.727273               85.714286             80.0   

   Normalized 50s  
0      100.000000  
1        0.000000  
2       83.333333  


In [None]:
matches_data.head(), matches_data.columns

(       id   season        city        date match_type player_of_match  \
 0  335982  2007/08   Bangalore  2008-04-18     League     BB McCullum   
 1  335983  2007/08  Chandigarh  2008-04-19     League      MEK Hussey   
 2  335984  2007/08       Delhi  2008-04-19     League     MF Maharoof   
 3  335985  2007/08      Mumbai  2008-04-20     League      MV Boucher   
 4  335986  2007/08     Kolkata  2008-04-20     League       DJ Hussey   
 
                                         venue                        team1  \
 0                       M Chinnaswamy Stadium  Royal Challengers Bangalore   
 1  Punjab Cricket Association Stadium, Mohali              Kings XI Punjab   
 2                            Feroz Shah Kotla             Delhi Daredevils   
 3                            Wankhede Stadium               Mumbai Indians   
 4                                Eden Gardens        Kolkata Knight Riders   
 
                          team2                  toss_winner toss_decision  \


In [None]:
deliveries_data.head(), deliveries_data.columns

(   match_id  inning           batting_team                 bowling_team  over  \
 0    335982       1  Kolkata Knight Riders  Royal Challengers Bangalore     0   
 1    335982       1  Kolkata Knight Riders  Royal Challengers Bangalore     0   
 2    335982       1  Kolkata Knight Riders  Royal Challengers Bangalore     0   
 3    335982       1  Kolkata Knight Riders  Royal Challengers Bangalore     0   
 4    335982       1  Kolkata Knight Riders  Royal Challengers Bangalore     0   
 
    ball       batter   bowler  non_striker  batsman_runs  extra_runs  \
 0     1   SC Ganguly  P Kumar  BB McCullum             0           1   
 1     2  BB McCullum  P Kumar   SC Ganguly             0           0   
 2     3  BB McCullum  P Kumar   SC Ganguly             0           1   
 3     4  BB McCullum  P Kumar   SC Ganguly             0           0   
 4     5  BB McCullum  P Kumar   SC Ganguly             0           0   
 
    total_runs extras_type  is_wicket player_dismissed dismissal_k

In [None]:
# Grouping by city and counting the number of matches played in each city
city_match_counts = matches_data['city'].value_counts().head(20)

In [None]:
# Converting the city match counts to a DataFrame with renamed columns
top_cities = pd.DataFrame({
    'City': city_match_counts.index,
    'Matches Played': city_match_counts.values
})
top_cities

Unnamed: 0,City,Matches Played
0,Mumbai,173
1,Kolkata,93
2,Delhi,90
3,Chennai,85
4,Hyderabad,77
5,Bangalore,65
6,Chandigarh,61
7,Jaipur,57
8,Pune,51
9,Abu Dhabi,37
