# IPL Dataset Exploratory Analysis

This notebook explores IPL match and delivery data using pandas to answer key analytical questions about matches, players, and teams.


## Data Loading

In [None]:
import pandas as pd

# Load datasets
deliveries_df = pd.read_csv("deliveries.csv")
matches_df = pd.read_csv("matches.csv")

deliveries_df.head(), matches_df.head()


## Questions 1–12: Basic Statistics

1. Number of Matches Played  
2. Number of Seasons Played  
3. Number of Players  
4. Number of Bowlers  
5. Number of Batsmen  
6. Number of Allrounders  
7. Number of Unique Venues  
8. Unique Teams  
9. Total Number of Deliveries Bowled  
10. Total Number of Wickets Fallen  
11. Total Number of Runs Scored  
12. Total Number of Boundaries Hit  


In [None]:
num_matches = matches_df['id'].nunique()
num_seasons = matches_df['season'].nunique()
num_players = pd.unique(deliveries_df[['batter', 'bowler', 'non_striker', 'player_dismissed']].values.ravel('K'))
num_players = len([p for p in num_players if pd.notnull(p)])
num_bowlers = deliveries_df['bowler'].nunique()
num_batsmen = deliveries_df['batter'].nunique()
batsmen_set = set(deliveries_df['batter'].unique())
bowlers_set = set(deliveries_df['bowler'].unique())
num_allrounders = len(batsmen_set.intersection(bowlers_set))
num_venues = matches_df['venue'].nunique()
unique_teams = pd.unique(matches_df[['team1', 'team2']].values.ravel('K'))
unique_teams = [t for t in unique_teams if pd.notnull(t)]
num_unique_teams = len(unique_teams)
total_deliveries = len(deliveries_df)
total_wickets = deliveries_df['is_wicket'].sum()
total_runs = deliveries_df['total_runs'].sum()
boundaries = deliveries_df[deliveries_df['batsman_runs'].isin([4, 6])].shape[0]

pd.DataFrame({
    "Question": [
        "Number of Matches Played", "Number of Seasons Played", "Number of Players",
        "Number of Bowlers", "Number of Batsmen", "Number of Allrounders",
        "Number of Unique Venues", "Number of Unique Teams", "Total Deliveries Bowled",
        "Total Wickets Fallen", "Total Runs Scored", "Total Boundaries Hit"
    ],
    "Answer": [
        num_matches, num_seasons, num_players, num_bowlers, num_batsmen, num_allrounders,
        num_venues, num_unique_teams, total_deliveries, total_wickets, total_runs, boundaries
    ]
})


## Questions 13–17: Top 5 Team Performances

13. Top 5 Teams with Most Match Wins  
14. Top 5 Teams Who Won Home Matches After Winning the Toss  
15. Top 5 Teams Who Won Home Matches After Losing the Toss  
16. Top 5 Teams Who Won Away Matches After Winning the Toss  
17. Top 5 Teams Who Won Away Matches After Losing the Toss  


In [None]:
top5_match_wins = matches_df['winner'].value_counts().head(5)
home_win_toss_win = matches_df[
    (matches_df['toss_winner'] == matches_df['team1']) &
    (matches_df['winner'] == matches_df['team1'])
]['team1'].value_counts().head(5)
home_win_toss_loss = matches_df[
    (matches_df['toss_winner'] != matches_df['team1']) &
    (matches_df['winner'] == matches_df['team1'])
]['team1'].value_counts().head(5)
away_win_toss_win = matches_df[
    (matches_df['toss_winner'] == matches_df['team2']) &
    (matches_df['winner'] == matches_df['team2'])
]['team2'].value_counts().head(5)
away_win_toss_loss = matches_df[
    (matches_df['toss_winner'] != matches_df['team2']) &
    (matches_df['winner'] == matches_df['team2'])
]['team2'].value_counts().head(5)

pd.DataFrame({
    'Top 5 Teams by Most Match Wins': top5_match_wins,
    'Home Wins after Winning Toss': home_win_toss_win,
    'Home Wins after Losing Toss': home_win_toss_loss,
    'Away Wins after Winning Toss': away_win_toss_win,
    'Away Wins after Losing Toss': away_win_toss_loss
}).reset_index().rename(columns={'index': 'Team'})


## Questions 18–21: Season-Wise Performance

18. Team Which Has Scored the Most Runs Each Season  
19. Highest Run Scored in a Match Each Season  
20. Highest Run Scored by a Player Each Season  
21. Highest Number of Wickets Taken by a Bowler  


In [None]:
season_runs = matches_df[['id', 'season', 'team1', 'team2']]
deliveries_with_season = deliveries_df.merge(season_runs, how='left', left_on='match_id', right_on='id')

team_season_runs = deliveries_with_season.groupby(['season', 'batting_team'])['total_runs'].sum().reset_index()
top_team_each_season = team_season_runs.loc[team_season_runs.groupby('season')['total_runs'].idxmax()]

match_total_runs = deliveries_with_season.groupby(['match_id', 'season', 'batting_team'])['total_runs'].sum().reset_index()
top_score_each_season = match_total_runs.loc[match_total_runs.groupby('season')['total_runs'].idxmax()]

player_season_runs = deliveries_with_season.groupby(['season', 'batter'])['batsman_runs'].sum().reset_index()
top_scorer_each_season = player_season_runs.loc[player_season_runs.groupby('season')['batsman_runs'].idxmax()]

wicket_kinds = ['bowled', 'caught', 'lbw', 'stumped', 'caught and bowled', 'hit wicket']
bowler_wickets = deliveries_df[deliveries_df['dismissal_kind'].isin(wicket_kinds)]
top_bowler = bowler_wickets['bowler'].value_counts().head(1)

print("Top Teams Each Season (Total Runs):")
display(top_team_each_season)

print("\nHighest Match Score Each Season:")
display(top_score_each_season)

print("\nTop Player Each Season (Runs):")
display(top_scorer_each_season)

print("\nTop Wicket Taker:")
display(top_bowler)
