# Import Required Libraries

In [35]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets
import os 

# Define Base Data Path

In [38]:
BASE_DATA_PATH = "../IPL_Analytics/"
TEAM_FILES = {
    'Chennai Super Kings': 'squad/Chennai_Super_Kings.csv',
    'Delhi Capitals': 'squad/Delhi_Capitals.csv',
    'Gujarat Titans': 'squad/Gujarat_Titans.csv',
    'Kolkata Knight Riders': 'squad/Kolkata_Knight_Riders.csv',
    'Lucknow Super Giants': 'squad/Lucknow_Super_Giants.csv',
    'Mumbai Indians': 'squad/Mumbai_Indians.csv',
    'Punjab Kings': 'squad/Punjab_Kings.csv',
    'Rajasthan Royals': 'squad/Rajasthan_Royals.csv',
    'Royal Challengers Bengaluru': 'squad/Royal_Challengers_Bengaluru.csv',
    'Sunrisers Hyderabad': 'squad/Sunrisers_Hyderabad.csv'
}

# Load Squad Data

In [41]:
def load_squad_data(base_path, team_files_map):
    """Loads all team squad data into a single DataFrame."""
    all_squads_df_list = []

    for team_name, file_path in team_files_map.items():
        full_path = os.path.join(base_path, file_path)
        try:
            df = pd.read_csv(full_path)
            df['Team'] = team_name
            all_squads_df_list.append(df)
        except FileNotFoundError:
            print(f"File not found for {team_name}")
        except Exception as e:
            print(f"Error loading {team_name}: {e}")

    if all_squads_df_list:
        return pd.concat(all_squads_df_list, ignore_index=True)
    else:
        return pd.DataFrame()

# Load Match Data

In [44]:
def load_match_data(base_path):
    """Loads and preprocesses match data."""
    try:
        df_info = pd.read_csv(os.path.join(base_path, "match_info-10jun25.csv"))
        df_list = pd.read_csv(os.path.join(base_path, "match_list-9jun25.csv"))

        df_info.rename(columns={"id": "MatchID"}, inplace=True)
        df = pd.merge(df_info, df_list, on="MatchID")
        df['MatchDate'] = pd.to_datetime(df['MatchDate'])
        return df
    except Exception as e:
        print("Error loading match data:", e)
        return pd.DataFrame()

# Load All Data

In [47]:
all_squads_df = load_squad_data(BASE_DATA_PATH, TEAM_FILES)
all_matches_df = load_match_data(BASE_DATA_PATH)

display(all_squads_df.head())
display(all_matches_df.head())

Unnamed: 0.1,Unnamed: 0,id,name,role,battingStyle,bowlingStyle,country,playerImg,Team
0,0,4f95ebbb-226b-4bef-bd2c-02daec890ba4,Shaik Rasheed,Batsman,Right Handed Bat,Right-arm legbreak,India,https://h.cricapi.com/img/icon512.png,Chennai Super Kings
1,1,81b446e1-bfea-45a7-a15e-062b8157a323,Ravindra Jadeja,Bowling Allrounder,Left Handed Bat,Left-arm orthodox,India,https://h.cricapi.com/img/players/81b446e1-bfe...,Chennai Super Kings
2,2,58139def-7c4a-4cc2-a09a-0caff43b43dd,Kamlesh Nagarkoti,Bowler,Right Handed Bat,Right-arm fast,India,https://h.cricapi.com/img/players/58139def-7c4...,Chennai Super Kings
3,3,de60a340-5eb8-4160-861e-182aafd279c5,Vijay Shankar,Batting Allrounder,Right Handed Bat,Right-arm medium,India,https://h.cricapi.com/img/players/de60a340-5eb...,Chennai Super Kings
4,4,9d8be8e7-4218-400f-919a-211415490575,Jamie Overton,Bowler,Right Handed Bat,Right-arm fast,England,https://h.cricapi.com/img/icon512.png,Chennai Super Kings


Unnamed: 0,Unnamed: 0_x,MatchID,Team1,Team2,tossWinner,tossChoice,matchWinner,Innings1,r1,w1,...,o2,Unnamed: 0_y,MatchName,MatchNumber,MatchType,MatchVenue,MatchDate,MatchDateTime,MatchStarted,MatchEnded
0,0,cacf2d34-41b8-41dd-91ed-5183d880084c,Kolkata Knight Riders,Royal Challengers Bengaluru,Royal Challengers Bengaluru,bowl,Royal Challengers Bengaluru,Kolkata Knight Riders Inning 1,174.0,8.0,...,16.2,0,Kolkata Knight Riders vs Royal Challengers Ben...,1,t20,"Eden Gardens, Kolkata",2025-03-22,2025-03-22 14:00:00,True,True
1,1,91b007f3-c0af-493f-808a-3f4ae2d66e33,Sunrisers Hyderabad,Rajasthan Royals,Rajasthan Royals,bowl,Sunrisers Hyderabad,Sunrisers Hyderabad Inning 1,286.0,6.0,...,20.0,1,Sunrisers Hyderabad vs Rajasthan Royals,2,t20,"Rajiv Gandhi International Stadium, Hyderabad",2025-03-23,2025-03-23 10:00:00,True,True
2,2,208d68e5-3fab-4f3b-88e9-29ec4a02d3e2,Chennai Super Kings,Mumbai Indians,Chennai Super Kings,bowl,Chennai Super Kings,Mumbai Indians Inning 1,155.0,9.0,...,19.1,2,Chennai Super Kings vs Mumbai Indians,3,t20,"MA Chidambaram Stadium, Chennai",2025-03-23,2025-03-23 14:00:00,True,True
3,3,c6e97609-d9c1-46eb-805a-e282b34f3bb1,Delhi Capitals,Lucknow Super Giants,Delhi Capitals,bowl,Delhi Capitals,Lucknow Super Giants Inning 1,209.0,8.0,...,19.3,3,Delhi Capitals vs Lucknow Super Giants,4,t20,Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket St...,2025-03-24,2025-03-24 14:00:00,True,True
4,4,83d70527-5fc4-4fad-8dd2-b88b385f379e,Gujarat Titans,Punjab Kings,Gujarat Titans,bowl,Punjab Kings,Punjab Kings Inning 1,243.0,5.0,...,20.0,4,Gujarat Titans vs Punjab Kings,5,t20,"Narendra Modi Stadium, Ahmedabad",2025-03-25,2025-03-25 14:00:00,True,True


# Team Dropdown Widget

In [50]:
all_teams = sorted(all_squads_df['Team'].unique().tolist()) if not all_squads_df.empty else []

team_selector = widgets.Dropdown(
    options=all_teams if all_teams else ['No Teams Available'],
    value=all_teams[0] if all_teams else None,
    description='Select Team:',
    disabled=not bool(all_teams),
    layout=widgets.Layout(width='auto')
)

output_area = widgets.Output()

# Bar Plot Distribution

In [53]:
def plot_distribution(data, title, ax, palette_name, x_label=''):
    if data.empty:
        ax.set_visible(False)
        return

    sb.barplot(x=data.index, y=data.values, ax=ax,
               palette=palette_name, hue=data.index, legend=False)
    ax.set_title(title)
    ax.set_ylabel("Count")
    ax.tick_params(axis='x', rotation=45)
    if x_label:
        ax.set_xlabel(x_label)

# Display Table & Plot

In [56]:
def display_dataframe_and_plot(df, title, plot_type='bar',
                               stacked=False, figsize=(10,6),
                               xlabel='', ylabel=''):
    print(f"\n{title}")
    display(df)

    if df.empty or plot_type == 'table':
        return

    fig, ax = plt.subplots(figsize=figsize)

    if plot_type == 'bar':
        df.plot(kind='bar', stacked=stacked, ax=ax)
        ax.tick_params(axis='x', rotation=45)

    elif plot_type == 'pie':
        ax.pie(df.values, labels=df.index,
               autopct='%1.1f%%', startangle=90)

    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.tight_layout()
    plt.show()

# Main Team Analysis Function

In [59]:
def perform_team_analysis(team_name):
    """Performs and displays a comprehensive analysis for the selected team."""
    with output_area:
        clear_output(wait=True) # Ensure this clears text output first
        plt.close('all')      # This is critical for clearing plots

        if team_name is None or team_name == 'No Teams Available':
            print("Please select a valid team to view analysis.")
            return

        print(f"--- Analyzing {team_name} Squad & Performance ---")
        print("="*60)

        # --- Squad Analysis ---
        selected_squad_df = all_squads_df[all_squads_df['Team'] == team_name].copy()

        if selected_squad_df.empty:
            print(f"No squad data found for {team_name}. Please check the squad CSVs.")
        else:
            print("\n### Squad Composition ###")
            role_dist = selected_squad_df['role'].value_counts()
            nationality_dist = selected_squad_df['country'].value_counts()
            batting_dist = selected_squad_df['battingStyle'].value_counts()
            bowling_dist = selected_squad_df[selected_squad_df['bowlingStyle'].notna()]['bowlingStyle'].value_counts()

            print("\nPlayer Roles Distribution:")
            print(role_dist)
            print("\nNationality Distribution:")
            print(nationality_dist)
            print("\nBatting Styles:")
            print(batting_dist)
            print("\nBowling Styles:")
            print(bowling_dist)

            plt.figure(figsize=(16, 12))
            gs = plt.GridSpec(2, 2)

            plot_distribution(role_dist, 'Player Roles Distribution', plt.subplot(gs[0, 0]), 'viridis')
            plot_distribution(nationality_dist, 'Nationality Distribution', plt.subplot(gs[0, 1]), 'magma')
            plot_distribution(batting_dist, 'Batting Styles', plt.subplot(gs[1, 0]), 'plasma')
            plot_distribution(bowling_dist, 'Bowling Styles', plt.subplot(gs[1, 1]), 'cividis')

            plt.tight_layout()
            plt.show()

            role_country = selected_squad_df.groupby(['role', 'country']).size().unstack().fillna(0)
            display_dataframe_and_plot(role_country, f'{team_name} - Players by Role and Country',
                                       stacked=True, xlabel='Role', ylabel='Count')

        # --- Match Performance Analysis ---
        if all_matches_df.empty:
            print("\nNo match data available for analysis. Please check match CSVs.")
        else:
            team_matches = all_matches_df[(all_matches_df['Team1'] == team_name) | (all_matches_df['Team2'] == team_name)].copy()

            if team_matches.empty:
                print(f"\nNo match data found for {team_name}. Please check match CSVs.")
            else:
                is_team1 = team_matches['Team1'] == team_name
                team_matches['result'] = np.select(
                    [team_matches['matchWinner'] == team_name, team_matches['matchWinner'] == 'No Winner'],
                    ['Win', 'No Result'],
                    default='Loss'
                )
                team_matches['team_runs'] = np.where(is_team1, team_matches['r1'], team_matches['r2'])
                team_matches['team_wickets_lost'] = np.where(is_team1, team_matches['w1'], team_matches['w2'])
                team_matches['team_overs_batting'] = np.where(is_team1, team_matches['o1'], team_matches['o2'])
                team_matches['opposition_runs'] = np.where(is_team1, team_matches['r2'], team_matches['r1'])
                team_matches['team_wickets_taken'] = np.where(is_team1, team_matches['w2'], team_matches['w1'])
                team_matches['opposition'] = np.where(is_team1, team_matches['Team2'], team_matches['Team1'])
                team_matches['toss_win'] = team_matches['tossWinner'] == team_name

                print("\n### Match Performance ###")
                result_dist = team_matches['result'].value_counts()
                print("\nMatch Results:")
                print(result_dist)

                total_matches_played = len(team_matches)
                win_rate = (result_dist.get('Win', 0) / total_matches_played) * 100 if total_matches_played > 0 else 0
                print(f"\n{team_name} Match Performance:")
                print(f"Total matches played: {total_matches_played}")
                print(f"Win rate: {win_rate:.1f}%")

                wins_losses_sum = result_dist.get('Win', 0) + result_dist.get('Loss', 0)
                win_percentage_calc = (result_dist.get('Win', 0) / wins_losses_sum) * 100 if wins_losses_sum > 0 else 0
                print(f"Win Percentage (excluding 'No Result'): {win_percentage_calc:.2f}%")

                opposition_perf = team_matches.groupby('opposition')['result'].value_counts().unstack().fillna(0)
                opposition_perf['Win Percentage'] = (opposition_perf['Win'] /
                                                     (opposition_perf['Win'] + opposition_perf['Loss'])) * 100
                opposition_perf.sort_values('Win Percentage', ascending=False, inplace=True)
                display_dataframe_and_plot(opposition_perf[['Win', 'Loss']], 'Performance by Opposition',
                                           stacked=True, xlabel='Opposition Team', ylabel='Number of Matches')

                plt.figure(figsize=(16, 6))
                gs_match = plt.GridSpec(1, 2)

                ax_match0 = plt.subplot(gs_match[0, 0])
                ax_match0.pie(result_dist, labels=result_dist.index, autopct='%1.1f%%', colors=['green', 'red', 'grey'],
                               wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
                ax_match0.set_title('Match Results Distribution')

                ax_match1 = plt.subplot(gs_match[0, 1])
                opposition_perf[['Win', 'Loss']].plot(kind='bar', stacked=True, ax=ax_match1, color=['green', 'red'])
                ax_match1.set_title('Performance by Opposition')
                ax_match1.set_xlabel('Opposition Team')
                ax_match1.set_ylabel('Number of Matches')
                ax_match1.tick_params(axis='x', rotation=45)
                plt.tight_layout()
                plt.show()

                venue_perf = team_matches.groupby('MatchVenue')['result'].value_counts().unstack().fillna(0)
                venue_perf['Win Percentage'] = (venue_perf['Win'] / (venue_perf['Win'] + venue_perf['Loss'])) * 100
                venue_perf.sort_values('Win Percentage', ascending=False, inplace=True)
                display_dataframe_and_plot(venue_perf, 'Performance by Venue', plot_type='table') # Display as table

                # --- Toss Analysis ---
                print("\n### Toss Analysis ###")
                toss_dist = team_matches['toss_win'].value_counts()
                print("\nToss Wins:")
                print(toss_dist)

                total_tosses_played = len(team_matches)
                toss_win_percentage = (toss_dist.get(True, 0) / total_tosses_played) * 100 if total_tosses_played > 0 else 0
                print(f"\nToss Win Percentage: {toss_win_percentage:.1f}%")

                toss_decisions = team_matches[team_matches['toss_win']]['tossChoice'].value_counts()
                print("\nToss Decisions When Won:")
                print(toss_decisions)

                toss_result = team_matches.groupby('toss_win')['result'].value_counts().unstack().fillna(0)
                toss_result['Win Percentage'] = (toss_result['Win'] / (toss_result['Win'] + toss_result['Loss'])) * 100
                print("\nMatch Result Based on Toss:")
                print(toss_result)

                plt.figure(figsize=(16, 6))
                gs_toss = plt.GridSpec(1, 2)

                ax_toss0 = plt.subplot(gs_toss[0, 0])
                ax_toss0.pie(toss_dist, labels=['Won', 'Lost'], autopct='%1.1f%%', colors=['green', 'red'],
                               wedgeprops={'edgecolor': 'black', 'linewidth': 0.5})
                ax_toss0.set_title('Toss Wins Distribution')

                ax_toss1 = plt.subplot(gs_toss[0, 1])
                plot_distribution(toss_decisions, 'Toss Decisions When Won', ax_toss1, 'rocket', x_label='Toss Choice')
                plt.tight_layout()
                plt.show()

                # --- Batting & Bowling Figures ---
                print("\n### Batting & Bowling Figures ###")
                print("\nTop 5 Highest Scores:")
                display(team_matches.sort_values('team_runs', ascending=False).head(5)[['MatchDate', 'opposition', 'team_runs', 'result']])

                print("\nTop 5 Lowest Scores:")
                display(team_matches.sort_values('team_runs').head(5)[['MatchDate', 'opposition', 'team_runs', 'result']])

                print("\nTop 5 Bowling Performances (Lowest Opposition Scores):")
                display(team_matches.sort_values('opposition_runs').head(5)[['MatchDate', 'opposition', 'opposition_runs', 'team_wickets_taken', 'result']])

                print("\nTop 5 Worst Bowling Performances (Highest Opposition Scores):")
                display(team_matches.sort_values('opposition_runs', ascending=False).head(5)[['MatchDate', 'opposition', 'opposition_runs', 'team_wickets_taken', 'result']])

                # Wickets Analysis (Taken when Bowling)
                total_wickets_taken_bowling = team_matches['team_wickets_taken'].sum()
                if total_wickets_taken_bowling > 0:
                    # Note: These are illustrative percentages as detailed dismissal type data isn't in your match_info/list files
                    dismissal_types_taken = {
                        'Bowled': int(total_wickets_taken_bowling * 0.25),
                        'Caught': int(total_wickets_taken_bowling * 0.45),
                        'LBW': int(total_wickets_taken_bowling * 0.10),
                        'Run Out': int(total_wickets_taken_bowling * 0.12),
                        'Stumped': int(total_wickets_taken_bowling * 0.03),
                    }
                    dismissal_types_taken['Other'] = total_wickets_taken_bowling - sum(dismissal_types_taken.values())
                    dismissal_series = pd.Series(dismissal_types_taken)

                    print("\n" + "="*50)
                    print(f"{team_name} Wicket Analysis (When Bowling)")
                    print("="*50)
                    print(f"Total wickets taken: {total_wickets_taken_bowling}")
                    display_dataframe_and_plot(dismissal_series, 'Wickets by dismissal type (Bowling)',
                                               plot_type='pie', figsize=(8, 8))
                else:
                    print(f"\n{team_name} took 0 wickets in the analyzed matches (when bowling).")

                # Wickets Analysis (Lost when Batting)
                total_wickets_lost_batting = team_matches['team_wickets_lost'].sum()
                if total_wickets_lost_batting > 0:
                    # Note: These are illustrative percentages as detailed dismissal type data isn't in your match_info/list files
                    wickets_lost_types = {
                        'Bowled': int(total_wickets_lost_batting * 0.20),
                        'Caught': int(total_wickets_lost_batting * 0.50),
                        'LBW': int(total_wickets_lost_batting * 0.08),
                        'Run Out': int(total_wickets_lost_batting * 0.15),
                        'Stumped': int(total_wickets_lost_batting * 0.02),
                    }
                    wickets_lost_types['Other'] = total_wickets_lost_batting - sum(wickets_lost_types.values())
                    wickets_lost_series = pd.Series(wickets_lost_types)

                    print("\n" + "="*50)
                    print(f"{team_name} Wicket Analysis (When Batting)")
                    print("="*50)
                    print(f"Total wickets lost: {total_wickets_lost_batting}")
                    display_dataframe_and_plot(wickets_lost_series, 'Wickets by dismissal type (Batting)',
                                               plot_type='pie', figsize=(8, 8))
                else:
                    print(f"\n{team_name} lost 0 wickets in the analyzed matches (when batting).")

        print("\n" + "="*60)
        print(f"--- End of {team_name} Analysis ---")

In [61]:
def on_team_change(change):
    perform_team_analysis(change.new)
    analyze_match_performance(change.new)

team_selector.observe(on_team_change, names='value')

# Display Dashboard

In [64]:
dashboard = widgets.VBox([team_selector, output_area])
display(dashboard)

VBox(children=(Dropdown(description='Select Team:', layout=Layout(width='auto'), options=('Chennai Super Kings…