In [5]:
import pandas as pd

# 1. Load data
df = pd.read_csv('ODI_Match_Data.csv')

# 2. Check available columns
print("Available columns:", df.columns.tolist())

# 3. Define standard mapping (expected names -> real names)
column_mapping = {
    'match_id': ['match_id', 'Match_Id', 'id'],
    'venue': ['venue', 'Venue'],
    'innings': ['innings', 'Innings'],
    'batting_team': ['batting_team', 'BattingTeam', 'bat_team'],
    'bowling_team': ['bowling_team', 'BowlingTeam', 'bowl_team'],
    'ball': ['ball', 'Ball'],
    'over': ['over', 'Over'],  # Sometimes need to extract from ball if missing
    'runs_off_bat': ['runs_off_bat', 'batsman_runs', 'runs'],
    'extras': ['extras', 'Extras', 'extra_runs'],
    'player_dismissed': ['player_dismissed', 'player_out', 'wicket_player_out'],
    'bowler': ['bowler', 'Bowler', 'bowler_name'],
    'batsman': ['batsman', 'Batsman', 'batsman_name']
}

# 4. Auto map columns
final_columns = {}
for standard_col, possible_names in column_mapping.items():
    for name in possible_names:
        if name in df.columns:
            final_columns[standard_col] = name
            break

print("\nMapped columns:", final_columns)

# 5. Keep only the matched columns
missing_cols = [col for col in column_mapping.keys() if col not in final_columns]
if missing_cols:
    print(f"\n⚠️ Warning: Missing columns in data: {missing_cols}")

# Select and Rename
df = df[[final_columns[col] for col in final_columns]]
df = df.rename(columns={v: k for k, v in final_columns.items()})

# 6. Handle missing 'over' (calculate from ball if needed)
if 'over' not in df.columns and 'ball' in df.columns:
    df['over'] = df['ball'].astype(str).str.split('.').str[0].astype(int)

# 7. Create total_runs column
if 'extras' in df.columns:
    df['total_runs'] = df['runs_off_bat'] + df['extras']
else:
    df['total_runs'] = df['runs_off_bat']

print("\n✅ Dataframe is ready for processing!")
print(df.head())


  df = pd.read_csv('ODI_Match_Data.csv')


Available columns: ['match_id', 'season', 'start_date', 'venue', 'innings', 'ball', 'batting_team', 'bowling_team', 'striker', 'non_striker', 'bowler', 'runs_off_bat', 'extras', 'wides', 'noballs', 'byes', 'legbyes', 'penalty', 'wicket_type', 'player_dismissed', 'other_wicket_type', 'other_player_dismissed', 'cricsheet_id']

Mapped columns: {'match_id': 'match_id', 'venue': 'venue', 'innings': 'innings', 'batting_team': 'batting_team', 'bowling_team': 'bowling_team', 'ball': 'ball', 'runs_off_bat': 'runs_off_bat', 'extras': 'extras', 'player_dismissed': 'player_dismissed', 'bowler': 'bowler'}


✅ Dataframe is ready for processing!
   match_id                           venue  innings batting_team  \
0   1389389  Holkar Cricket Stadium, Indore        1        India   
1   1389389  Holkar Cricket Stadium, Indore        1        India   
2   1389389  Holkar Cricket Stadium, Indore        1        India   
3   1389389  Holkar Cricket Stadium, Indore        1        India   
4   1389389  Hol

In [7]:
import matplotlib.pyplot as plt
import seaborn as sns

# Focus only on overs 45-50
death_overs_df = df[(df['over'] >= 45) & (df['over'] <= 50)]

# --- Top Death Over Batsmen ---
death_batsmen = (death_overs_df
                 .groupby('batsman')['total_runs']
                 .sum()
                 .sort_values(ascending=False)
                 .head(10))

# Plot
plt.figure(figsize=(12,6))
sns.barplot(x=death_batsmen.values, y=death_batsmen.index, palette='coolwarm')
plt.title('🏏 Top 10 Death Over Batsmen (45-50 overs)', fontsize=16)
plt.xlabel('Runs Scored')
plt.ylabel('Batsman')
plt.show()

# --- Top Death Over Bowlers (Wickets) ---
death_bowlers = (death_overs_df[~death_overs_df['player_dismissed'].isna()]
                 .groupby('bowler')
                 .size()
                 .sort_values(ascending=False)
                 .head(10))

# Plot
plt.figure(figsize=(12,6))
sns.barplot(x=death_bowlers.values, y=death_bowlers.index, palette='mako')
plt.title('🔥 Top 10 Death Over Bowlers (45-50 overs)', fontsize=16)
plt.xlabel('Wickets Taken')
plt.ylabel('Bowler')
plt.show()


KeyError: 'batsman'