In [4]:
import pandas as pd

matches_df = pd.DataFrame({
    "match_id": [1, 2, 3, 4, 5],
    "season": [2020, 2020, 2021, 2021, 2022],
    "team1": ["MI", "RCB", "CSK", "MI", "GT"],
    "team2": ["CSK", "MI", "RCB", "KKR", "RR"],
    "winner": ["MI", "RCB", "CSK", None, "GT"],
    "result": ["completed", "completed", "tied", "abandoned", "completed"]
})

balls_df = pd.DataFrame({
    "match_id": [1, 1, 1, 2, 2, 3, 3, 5, 5, 5],
    "inning": [1, 1, 1, 1, 1, 2, 2, 1, 1, 2],
    "ball_no": [1, 2, 3, 1, 2, 1, 2, 1, 2, 1],
    "batsman": ["Rohit", "Rohit", "Kishan", "Kohli", "Kohli", "Dhoni", "Dhoni", "Gill", "Saha", "Buttler"],
    "batting_team": ["MI", "MI", "MI", "RCB", "RCB", "CSK", "CSK", "GT", "GT", "RR"]
})

team_year = pd.concat([
    matches_df[['season', 'team1']].rename(columns={'team1': 'team'}),
    matches_df[['season', 'team2']].rename(columns={'team2': 'team'})
]).drop_duplicates()

teams_each_year = team_year.groupby("season")["team"].nunique()
print("1. Teams each year:\n", teams_each_year)

match_results = matches_df['result'].value_counts()
print("\n2. Match result counts:\n", match_results)

avg_balls = balls_df.groupby(["match_id", "inning"]).size().groupby("inning").mean()
print("\n3. Average balls per match per inning:\n", avg_balls)

match_wins = matches_df.dropna(subset=["winner"]).groupby(["season", "winner"]).size().reset_index(name="wins")
match_wins_sorted = match_wins.sort_values(["season", "wins"], ascending=[True, False])
print("\n4. Matches won per year:\n", match_wins_sorted)

batsmen_grouped = balls_df.groupby(["match_id", "batting_team"])["batsman"].agg(
    batsmen_list=lambda x: list(x),
    batsmen_set=lambda x: list(set(x))
).reset_index()

print("\n5. Batsmen list vs set per match/team:\n", batsmen_grouped)


1. Teams each year:
 season
2020    3
2021    4
2022    2
Name: team, dtype: int64

2. Match result counts:
 result
completed    3
tied         1
abandoned    1
Name: count, dtype: int64

3. Average balls per match per inning:
 inning
1    2.333333
2    1.500000
dtype: float64

4. Matches won per year:
    season winner  wins
0    2020     MI     1
1    2020    RCB     1
2    2021    CSK     1
3    2022     GT     1

5. Batsmen list vs set per match/team:
    match_id batting_team            batsmen_list      batsmen_set
0         1           MI  [Rohit, Rohit, Kishan]  [Kishan, Rohit]
1         2          RCB          [Kohli, Kohli]          [Kohli]
2         3          CSK          [Dhoni, Dhoni]          [Dhoni]
3         5           GT            [Gill, Saha]     [Saha, Gill]
4         5           RR               [Buttler]        [Buttler]
