In [2]:
import os
print("Current working directory:", os.getcwd())


Current working directory: C:\Users\ASUS


In [3]:
import pandas as pd


df = pd.read_excel("C:/Users/ASUS/cleaned_ipl_data_batsman.xlsx")

# for use case for cleaning columns 
df.columns = df.columns.str.strip().str.replace(' ', '_')

# Clean / format data
df['batsmanName'] = df['batsmanName'].str.strip()
df['SR'] = pd.to_numeric(df['SR'], errors='coerce')
df['runs'] = pd.to_numeric(df['runs'], errors='coerce')

# Drop rows that include some missing value 
df = df.dropna(subset=['batsmanName', 'runs', 'SR'])

# ✅ Creating is out from out/nout 
if 'out/not_out' in df.columns:
    df['is_out'] = df['out/not_out'].str.lower().map({'out': 1, 'not_out': 0, 'not out': 0})
else:
    raise KeyError("'out/not_out' column not found in the dataset")

# Fill missing values 
df['is_out'] = df['is_out'].fillna(1).astype(int)

# Group by batsman and aggregate 
player_stats = df.groupby('batsmanName').agg(
    total_runs=('runs', 'sum'),
    innings=('match_id', 'count'),
    average_SR=('SR', 'mean'),
    dismissals=('is_out', 'sum')
).reset_index()

# for diving it by zero avoiding it 
player_stats['dismissals'] = player_stats['dismissals'].replace(0, 1)

# averages
player_stats['batting_avg'] = player_stats['total_runs'] / player_stats['dismissals']
player_stats['avg_runs_per_innings'] = player_stats['total_runs'] / player_stats['innings']

# performance score
player_stats['performance_score'] = (
    player_stats['avg_runs_per_innings'] +
    (player_stats['average_SR'] / 5) +
    (player_stats['batting_avg'] / 2)
)

# Filter by minimum number of innings played
player_stats = player_stats[player_stats['innings'] >= 10]

# Sort and get top 10
predicted_top_5 = player_stats.sort_values(by='performance_score', ascending=False).head(10)

# Show output
print("🔮 Predicted Top Batsmen for IPL 2025:")
print(predicted_top_5[['batsmanName', 'avg_runs_per_innings', 'average_SR', 'batting_avg', 'performance_score']])


🔮 Predicted Top Batsmen for IPL 2025:
         batsmanName  avg_runs_per_innings  average_SR  batting_avg  \
402      Virat Kohli             49.400000  155.598667    61.750000   
120  HeinrichKlaasen             40.727273  182.172727    49.777778   
237  Nicholas Pooran             35.642857  160.368571    62.375000   
305      Riyan Parag             40.928571  141.675000    52.090909   
316    Sai Sudharsan             43.818182  133.755455    48.200000   
358      ShubmanGill             41.606061  121.210606    47.344828   
368  SuryakumarYadav             37.833333  149.895000    43.238095   
325     Sanju Samson             36.642857  131.791429    51.300000   
84       DevonConway             42.000000  111.330000    48.631579   
317     SaiSudharsan             39.000000  131.600000    46.090909   

     performance_score  
402         111.394733  
120         102.050707  
237          98.904071  
305          95.309026  
316          94.669273  
358          89.520596  
368  