In [2]:
import pandas as pd

# Load the data for both players
bumrah_file_path = r"C:\Users\91877\Desktop\cricket_ML\Datasets\Preprocessed_Jasprit_Bumrah_Data.xlsx"
shami_file_path =  r"C:\Users\91877\Desktop\cricket_ML\Datasets\Preprocessed_Mohammed_Shami_Data.xlsx"

# Load data from the first sheet of each file
bumrah_data = pd.read_excel(bumrah_file_path, sheet_name=0)
shami_data = pd.read_excel(shami_file_path, sheet_name=0)

# Display first few rows
bumrah_data.head(), shami_data.head()


(   Player_name     Overs  Maidens  Runs  Wickets  Economy  Position  Innings  \
 0            0  3.300000        0    23        3     6.57         3        2   
 1            0  9.706840        0    84        3     8.61         4        2   
 2            0  4.673089        0    49        5    10.45         2        2   
 3            0  6.721472        1    59        3     8.75         4        2   
 4            0  2.255513        0    18        5     8.08         4        1   
 
     Opposition    Ground Start Date Format  
 0  v Australia  Adelaide 2016-01-26   T20I  
 1  v Australia  Adelaide 2016-11-04    ODI  
 2  v Australia  Adelaide 2022-02-11   T20I  
 3  v Australia  Adelaide 2017-06-05   T20I  
 4  v Australia  Adelaide 2017-01-01    ODI  ,
    Player_name  Overs  Maidens  Runs  Wickets  Economy  Position  Innings  \
 0            1    4.0        0    46        0    11.50         3        2   
 1            1    4.0        0    25        1     6.25         3        1   
 

In [3]:
# Define the key Australian venues
target_grounds = ["Perth", "Adelaide", "Melbourne", "Hobart"]

# Filter the data
bumrah_aus = bumrah_data[bumrah_data["Ground"].isin(target_grounds)]
shami_aus = shami_data[shami_data["Ground"].isin(target_grounds)]

# Display filtered data
bumrah_aus.head(), shami_aus.head()



(   Player_name     Overs  Maidens  Runs  Wickets  Economy  Position  Innings  \
 0            0  3.300000        0    23        3     6.57         3        2   
 1            0  9.706840        0    84        3     8.61         4        2   
 2            0  4.673089        0    49        5    10.45         2        2   
 3            0  6.721472        1    59        3     8.75         4        2   
 4            0  2.255513        0    18        5     8.08         4        1   
 
     Opposition    Ground Start Date Format  
 0  v Australia  Adelaide 2016-01-26   T20I  
 1  v Australia  Adelaide 2016-11-04    ODI  
 2  v Australia  Adelaide 2022-02-11   T20I  
 3  v Australia  Adelaide 2017-06-05   T20I  
 4  v Australia  Adelaide 2017-01-01    ODI  ,
    Player_name  Overs  Maidens  Runs  Wickets  Economy  Position  Innings  \
 1            1    4.0        0    25        1     6.25         3        1   
 3            1    4.0        0    13        1     3.25         3        2   
 

In [4]:
# Aggregate statistics for comparison
bumrah_stats = bumrah_aus.agg({
    "Overs": "sum",
    "Maidens": "sum",
    "Runs": "sum",
    "Wickets": "sum",
    "Economy": "mean"
}).rename("Jasprit Bumrah")

shami_stats = shami_aus.agg({
    "Overs": "sum",
    "Maidens": "sum",
    "Runs": "sum",
    "Wickets": "sum",
    "Economy": "mean"
}).rename("Mohammed Shami")

# Calculate additional metrics
bumrah_stats["Bowling Average"] = bumrah_stats["Runs"] / bumrah_stats["Wickets"]
shami_stats["Bowling Average"] = shami_stats["Runs"] / shami_stats["Wickets"]

bumrah_stats["Strike Rate"] = (bumrah_stats["Overs"] * 6) / bumrah_stats["Wickets"]
shami_stats["Strike Rate"] = (shami_stats["Overs"] * 6) / shami_stats["Wickets"]

# Combine into a DataFrame
comparison_df = pd.DataFrame([bumrah_stats, shami_stats])

# Display comparison
comparison_df


Unnamed: 0,Overs,Maidens,Runs,Wickets,Economy,Bowling Average,Strike Rate
Jasprit Bumrah,147.044673,11.0,1276.0,82.0,8.248077,15.560976,10.759366
Mohammed Shami,218.805949,10.0,1660.0,54.0,7.740606,30.740741,24.311772


## **Overall Performance Comparison**

- **Jasprit Bumrah has a superior strike rate (10.76 vs 24.31), meaning he takes wickets more frequently.**
- **Bumrah has taken 82 wickets, while Shami has only taken 54.**
- **Bowling average favors Bumrah (15.56) over Shami (30.74), meaning Bumrah concedes fewer runs per wicket.**
- **Shami is slightly more economical (7.74) compared to Bumrah (8.25).**
- **Overall, Bumrah emerges as the better choice based on wickets, strike rate, and bowling average.**


In [5]:
# Aggregate performance per ground
ground_wise_bumrah = bumrah_aus.groupby("Ground").agg({
    "Overs": "sum",
    "Maidens": "sum",
    "Runs": "sum",
    "Wickets": "sum",
    "Economy": "mean"
})

ground_wise_shami = shami_aus.groupby("Ground").agg({
    "Overs": "sum",
    "Maidens": "sum",
    "Runs": "sum",
    "Wickets": "sum",
    "Economy": "mean"
})

# Calculate additional metrics
for df in [ground_wise_bumrah, ground_wise_shami]:
    df["Bowling Average"] = df["Runs"] / df["Wickets"]
    df["Strike Rate"] = (df["Overs"] * 6) / df["Wickets"]

# Merge data
ground_comparison = ground_wise_bumrah.add_suffix("_Bumrah").join(
    ground_wise_shami.add_suffix("_Shami"), how="outer"
)

# Display ground-wise performance
ground_comparison


Unnamed: 0_level_0,Overs_Bumrah,Maidens_Bumrah,Runs_Bumrah,Wickets_Bumrah,Economy_Bumrah,Bowling Average_Bumrah,Strike Rate_Bumrah,Overs_Shami,Maidens_Shami,Runs_Shami,Wickets_Shami,Economy_Shami,Bowling Average_Shami,Strike Rate_Shami
Ground,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Adelaide,60.746679,5.0,577.0,43.0,8.520833,13.418605,8.476281,49.43157,2,409,14,9.08,29.214286,21.184959
Melbourne,86.297994,6.0,699.0,39.0,8.014286,17.923077,13.276615,110.908223,4,814,26,7.451875,31.307692,25.594205
Perth,,,,,,,,58.466156,4,437,14,7.063333,31.214286,25.056924


## **Ground-Specific Insights**

### **Adelaide**
- **Bumrah dominates with 43 wickets, while Shami has only 14.**
- **Bowling average is significantly better for Bumrah (13.42 vs 29.21).**
- **Strike rate (8.47) shows Bumrah takes wickets more frequently compared to Shami (21.18).**

### **Melbourne**
- **Bumrah again leads with 39 wickets, compared to Shami's 26.**
- **Bowling average of 17.92 for Bumrah vs. 31.31 for Shami.**
- **Bumrah’s strike rate (13.28) is much better than Shami’s (25.59).**

### **Perth**
- **Bumrah has no recorded data.**
- **Shami has taken 14 wickets but with a modest bowling average of 31.21 and strike rate of 25.06.**

### **Overall Verdict**
- **Bumrah dominates in Adelaide and Melbourne.**
- **Shami has experience in Perth but isn’t particularly impressive.**
- **Hobart data is missing, requiring further validation.**
- **Bumrah remains the better pick for Australian conditions.**
