In [1]:
import pandas as pd
import numpy as np
from fuzzywuzzy import process
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_colwidth', None)

def fuzzy_search_with_suggestions(input_names, choices, player_type='players', threshold=80):
    print(f"\n🔍 {player_type.upper()} SEARCH RESULTS:")
    print("-" * 50)
    matched_names = {}
    for name in input_names:
        matches = process.extract(name, choices, limit=3)
        best_match = [m for m in matches if m[1] >= threshold]
        if best_match:
            print(f"✅ '{name}' → '{best_match[0][0]}' ({best_match[0][1]}%)")
            matched_names[name] = best_match[0][0]
        else:
            print(f"❌ '{name}' → Not found")
            suggestions = ", ".join([f"{m[0]} ({m[1]}%)" for m in matches])
            print(f"   💡 Try: {suggestions}")
            matched_names[name] = None
    return matched_names

def calculate_batsman_comparison(df, venue, batsmen):
    records = []
    for batsman in batsmen:
        # Overall
        overall = df[df['batsman'] == batsman]
        overall_inn = overall.groupby(['match_id', 'innings']).agg(
            runs=('runs_of_bat', 'sum'),
            balls=('runs_of_bat', 'count')
        ).reset_index()
        o_1st = overall_inn[overall_inn['innings'] == 1]['runs']
        o_2nd = overall_inn[overall_inn['innings'] == 2]['runs']
        o_bnd = (overall['runs_of_bat'] == 4).sum() + (overall['runs_of_bat'] == 6).sum()
        o_milestones = f"{(overall_inn['runs'] >= 30).sum()}|{(overall_inn['runs'] >= 50).sum()}|{(overall_inn['runs'] >= 100).sum()}"
        # Venue
        venue_df = overall[overall['venue'] == venue]
        venue_inn = venue_df.groupby(['match_id', 'innings']).agg(
            runs=('runs_of_bat', 'sum'),
            balls=('runs_of_bat', 'count')
        ).reset_index()
        v_1st = venue_inn[venue_inn['innings'] == 1]['runs']
        v_2nd = venue_inn[venue_inn['innings'] == 2]['runs']
        v_bnd = (venue_df['runs_of_bat'] == 4).sum() + (venue_df['runs_of_bat'] == 6).sum()
        v_milestones = f"{(venue_inn['runs'] >= 30).sum()}|{(venue_inn['runs'] >= 50).sum()}|{(venue_inn['runs'] >= 100).sum()}"
        records.append({
            'Player': batsman,
            'O_Mat': overall['match_id'].nunique(),
            'O_Runs': overall_inn['runs'].sum(),
            'O_Avg': round(overall_inn['runs'].mean(), 2) if not overall_inn.empty else 0,
            'O_1st': round(o_1st.mean(), 2) if not o_1st.empty else 0,
            'O_2nd': round(o_2nd.mean(), 2) if not o_2nd.empty else 0,
            'O_BPD': round(overall_inn['balls'].sum() / max(1, overall_inn['runs'].count()), 2),
            'O_Mile': o_milestones,
            'V_Mat': venue_df['match_id'].nunique(),
            'V_Runs': venue_inn['runs'].sum(),
            'V_Avg': round(venue_inn['runs'].mean(), 2) if not venue_inn.empty else 0,
            'V_SR': round(venue_inn['runs'].sum() / venue_inn['balls'].sum() * 100, 2) if venue_inn['balls'].sum() > 0 else 0,
            'V_HS': venue_inn['runs'].max() if not venue_inn.empty else 0,
            'V_Bnd': v_bnd,
            'V_1st': round(v_1st.mean(), 2) if not v_1st.empty else 0,
            'V_2nd': round(v_2nd.mean(), 2) if not v_2nd.empty else 0,
            'V_BPD': round(venue_inn['balls'].sum() / max(1, venue_inn['runs'].count()), 2) if not venue_inn.empty else 0,
            'V_BPB': round(venue_inn['balls'].sum() / max(1, v_bnd), 2) if v_bnd > 0 else 0,
            'V_Mile': v_milestones
        })
    return pd.DataFrame(records)

def calculate_bowler_comparison(df, venue, bowlers):
    records = []
    for bowler in bowlers:
        # Overall
        overall = df[df['bowler'] == bowler]
        overall_inn = overall.groupby(['match_id', 'innings']).agg(
            balls=('runs_of_bat', 'count'),
            runs_conceded=('total_runs', 'sum'),
            wickets=('player_dismissed', 'count')
        ).reset_index()
        o_1st = overall_inn[overall_inn['innings'] == 1]['wickets']
        o_2nd = overall_inn[overall_inn['innings'] == 2]['wickets']
        o_bnd = (overall['runs_of_bat'] == 4).sum() + (overall['runs_of_bat'] == 6).sum()
        o_milestones = f"{(overall_inn['wickets'] >= 3).sum()}|{(overall_inn['wickets'] >= 4).sum()}|{(overall_inn['wickets'] >= 5).sum()}"
        # Venue
        venue_df = overall[overall['venue'] == venue]
        venue_inn = venue_df.groupby(['match_id', 'innings']).agg(
            balls=('runs_of_bat', 'count'),
            runs_conceded=('total_runs', 'sum'),
            wickets=('player_dismissed', 'count')
        ).reset_index()
        v_1st = venue_inn[venue_inn['innings'] == 1]['wickets']
        v_2nd = venue_inn[venue_inn['innings'] == 2]['wickets']
        v_bnd = (venue_df['runs_of_bat'] == 4).sum() + (venue_df['runs_of_bat'] == 6).sum()
        v_milestones = f"{(venue_inn['wickets'] >= 3).sum()}|{(venue_inn['wickets'] >= 4).sum()}|{(venue_inn['wickets'] >= 5).sum()}"
        # Best figures
        if not venue_inn.empty and venue_inn['wickets'].max() > 0:
            idx = venue_inn['wickets'].idxmax()
            v_bf = f"{venue_inn.loc[idx, 'wickets']}/{venue_inn.loc[idx, 'runs_conceded']}"
        else:
            v_bf = '0/0'
        records.append({
            'Bowler': bowler,
            'O_Mat': overall['match_id'].nunique(),
            'O_Wkts': overall_inn['wickets'].sum(),
            'O_Avg': round(overall_inn['runs_conceded'].sum() / max(1, overall_inn['wickets'].sum()), 2),
            'O_1st': round(o_1st.mean(), 2) if not o_1st.empty else 0,
            'O_2nd': round(o_2nd.mean(), 2) if not o_2nd.empty else 0,
            'O_BPD': round(overall_inn['balls'].sum() / max(1, overall_inn['wickets'].sum()), 2),
            'O_Mile': o_milestones,
            'V_Mat': venue_df['match_id'].nunique(),
            'V_Wkts': venue_inn['wickets'].sum(),
            'V_Avg': round(venue_inn['runs_conceded'].sum() / max(1, venue_inn['wickets'].sum()), 2),
            'V_Eco': round(venue_inn['runs_conceded'].sum() / (venue_inn['balls'].sum() / 6), 2) if venue_inn['balls'].sum() > 0 else 0,
            'V_BF': v_bf,
            'V_Dot%': round((venue_df['runs_of_bat'] == 0).sum() / venue_inn['balls'].sum() * 100, 2) if venue_inn['balls'].sum() > 0 else 0,
            'V_1st': round(v_1st.mean(), 2) if not v_1st.empty else 0,
            'V_2nd': round(v_2nd.mean(), 2) if not v_2nd.empty else 0,
            'V_BPD': round(venue_inn['balls'].sum() / max(1, venue_inn['wickets'].sum()), 2) if not venue_inn.empty else 0,
            'V_BPB': round(venue_inn['balls'].sum() / max(1, v_bnd), 2) if v_bnd > 0 else 0,
            'V_Mile': v_milestones
        })
    return pd.DataFrame(records)

def get_venue_stats(df, venue):
    venue_df = df[df['venue'] == venue]
    match_results = venue_df.groupby(['match_id', 'innings'])['total_runs'].sum().unstack().fillna(0)
    match_count = match_results.shape[0]
    v_stats = {
        '1': match_results[1].sum(),
        '2': match_results[2].sum(),
        'total_match_run': match_results[1].sum() + match_results[2].sum(),
        'match_count': match_count,
        'avg_innings_1': round(match_results[1].sum() / match_count, 2) if match_count > 0 else 0,
        'avg_innings_2': round(match_results[2].sum() / match_count, 2) if match_count > 0 else 0,
        'inning_1_wins': (match_results[1] > match_results[2]).sum(),
        'inning_2_wins': (match_results[2] > match_results[1]).sum(),
        'HS': match_results.max().max(),
        'LS': match_results.min().min(),
        'HC': match_results[2].max(),
        'LD': match_results[1].min(),
        'avg_runs': round((match_results[1].sum() + match_results[2].sum()) / (2*match_count), 2) if match_count > 0 else 0
    }
    return pd.DataFrame([v_stats], index=[venue])

def get_head_to_head_table(df, venue, bowlers, batsmen):
    records = []
    for bowler in bowlers:
        for batsman in batsmen:
            matchup = df[(df['venue'] == venue) & (df['bowler'] == bowler) & (df['batsman'] == batsman)]
            if not matchup.empty:
                summary = matchup.groupby(['match_id', 'innings']).agg(
                    runs=('runs_of_bat', 'sum'),
                    balls=('runs_of_bat', 'count'),
                    dots=('runs_of_bat', lambda x: (x == 0).sum()),
                    dismissals=('player_dismissed', lambda x: x.eq(batsman).sum()),
                    fours=('runs_of_bat', lambda x: (x == 4).sum()),
                    sixes=('runs_of_bat', lambda x: (x == 6).sum())
                ).reset_index()
                summary['SR'] = summary.apply(lambda row: round(row['runs']/row['balls']*100, 2) if row['balls'] > 0 else 0, axis=1)
                summary['BPD'] = summary.apply(lambda row: round(row['balls']/row['dismissals'], 2) if row['dismissals'] > 0 else 0, axis=1)
                summary['BPB'] = summary.apply(lambda row: round(row['balls']/(row['fours']+row['sixes']), 2) if (row['fours']+row['sixes']) > 0 else 0, axis=1)
                summary['bowler'] = bowler
                summary['batsman'] = batsman
                records.append(summary)
    return pd.concat(records, ignore_index=True) if records else pd.DataFrame()

def venue_player_matchup_analysis(df, venue, batsman_names, bowler_names, threshold=80):
    print(f"\n🏟️ VENUE ANALYSIS: {venue}")
    print("=" * 80)
    batsmen_choices = df[df['venue'] == venue]['batsman'].unique()
    bowlers_choices = df[df['venue'] == venue]['bowler'].unique()
    batsmen_matched_dict = fuzzy_search_with_suggestions(batsman_names, batsmen_choices, 'batsmen', threshold)
    bowlers_matched_dict = fuzzy_search_with_suggestions(bowler_names, bowlers_choices, 'bowlers', threshold)
    valid_batsmen = [v for v in batsmen_matched_dict.values() if v is not None]
    valid_bowlers = [v for v in bowlers_matched_dict.values() if v is not None]

    # Batsman comparison table
    batsman_df = calculate_batsman_comparison(df, venue, valid_batsmen)
    if not batsman_df.empty:
        print(f"\n📋 COMPREHENSIVE PLAYER COMPARISON AT {venue.upper()}")
        print("=" * 120)
        print("Legend: O_ = Overall (All Venues), V_ = Venue Specific")
        print("Mat=Matches, Avg=Average, SR=Strike Rate, HS=Highest Score, Bnd=Boundaries")
        print("BPD=Balls Per Dismissal, BPB=Balls Per Boundary, Mile=Milestones (30s|50s|100s)")
        print("-" * 120)
        display(batsman_df)
    else:
        print("\nNo batsman stats found.")

    # Bowler comparison table
    bowler_df = calculate_bowler_comparison(df, venue, valid_bowlers)
    if not bowler_df.empty:
        print(f"\n📋 COMPREHENSIVE BOWLER COMPARISON AT {venue.upper()}")
        print("=" * 120)
        print("Legend: O_ = Overall (All Venues), V_ = Venue Specific")
        print("Mat=Matches, Wkts=Wickets, Avg=Bowling Average, Eco=Economy, BF=Best Figures, Dot%=Dot Ball %")
        print("BPD=Balls Per Dismissal, BPB=Balls Per Boundary, Mile=Milestones (3+|4+|5+ wickets)")
        print("-" * 120)
        display(bowler_df)
    else:
        print("\nNo bowler stats found.")

    # Venue stats
    venue_stats_df = get_venue_stats(df, venue)
    print(f"\n📊 VENUE STATS:")
    display(venue_stats_df)

    # H2H
    matchup_df = get_head_to_head_table(df, venue, valid_bowlers, valid_batsmen)
    if not matchup_df.empty:
        print("\n📊 HEAD-TO-HEAD MATCHUPS:")
        display(matchup_df)
    else:
        print("\n❌ No head-to-head data found for valid player combinations.")

# Example usage
df = pd.read_csv('all_matches_updated.csv').rename(columns={
    'striker': 'batsman',
    'runs_off_bat': 'runs_of_bat'
}).assign(
    innings=lambda x: x['innings'].astype(int),
    wides=lambda x: x['wides'].fillna(0),
    noballs=lambda x: x['noballs'].fillna(0),
    total_runs=lambda x: x['runs_of_bat'] + x['wides'] + x['noballs']
)

venue_player_matchup_analysis(
    df, 
    venue='Old Trafford, Manchester',
  
    batsman_names = [
        "R Vasconcelos", "MP Breetzke", "DJ Willey", "Ravi Bopara", "Saif Zaib", "GA Bartlett", "LD McManus", "Luke Procter", 
                "Ben Sanderson", "GLS Scrimshaw", "Lloyd Pope"
],

bowler_names = [
    "KK Jennings", "LWP Wells", "MF Hurst", "AJ Turner","LS Livingstone", 
    "JJ Bohannon", "CJ Green", "TW Hartley", "TH Aspinwall", "Charlie Barnard", "James Anderson"
],
    
    
    
    threshold=80
)



🏟️ VENUE ANALYSIS: Old Trafford, Manchester

🔍 BATSMEN SEARCH RESULTS:
--------------------------------------------------
✅ 'R Vasconcelos' → 'R Vasconcelos' (100%)
✅ 'MP Breetzke' → 'MP Breetzke' (100%)
✅ 'DJ Willey' → 'DJ Willey' (100%)
✅ 'Ravi Bopara' → 'RS Bopara' (80%)
✅ 'Saif Zaib' → 'SA Zaib' (88%)
❌ 'GA Bartlett' → Not found
   💡 Try: TW Hartley (57%), EN Gay (57%), GS Ballance (55%)
✅ 'LD McManus' → 'LD McManus' (100%)
❌ 'Luke Procter' → Not found
   💡 Try: JE Root (64%), M Carter (56%), LM Reece (50%)
❌ 'Ben Sanderson' → Not found
   💡 Try: EN Gay (60%), CG Harrison (50%), CN Ackermann (48%)
❌ 'GLS Scrimshaw' → Not found
   💡 Try: G Clark (51%), DR Sams (51%), SJ Croft (45%)
❌ 'Lloyd Pope' → Not found
   💡 Try: DL Lloyd (73%), JL du Plooy (48%), L Wood (45%)

🔍 BOWLERS SEARCH RESULTS:
--------------------------------------------------
❌ 'KK Jennings' → Not found
   💡 Try: DY Pennington (58%), F Singh (44%), MC Henriques (43%)
✅ 'LWP Wells' → 'LWP Wells' (100%)
❌ 'MF Hurst' →

Unnamed: 0,Player,O_Mat,O_Runs,O_Avg,O_1st,O_2nd,O_BPD,O_Mile,V_Mat,V_Runs,V_Avg,V_SR,V_HS,V_Bnd,V_1st,V_2nd,V_BPD,V_BPB,V_Mile
0,R Vasconcelos,48,1225,25.52,26.0,25.21,21.08,21|6|0,3,51,17.0,130.77,36,8,7.5,36.0,13.0,4.88,1|0|0
1,MP Breetzke,17,528,31.06,35.44,26.12,21.59,7|4|0,1,76,76.0,230.3,76,12,0.0,76.0,33.0,2.75,1|1|0
2,DJ Willey,108,2861,26.49,27.02,25.9,18.84,42|17|2,2,36,18.0,120.0,26,3,10.0,26.0,15.0,10.0,0|0|0
3,RS Bopara,139,3406,24.5,27.93,21.82,18.99,48|23|1,1,36,36.0,138.46,36,3,0.0,36.0,26.0,8.67,1|0|0
4,SA Zaib,61,1229,20.15,27.14,14.21,14.02,12|7|0,4,84,21.0,113.51,57,7,26.67,4.0,18.5,10.57,1|1|0
5,LD McManus,86,1057,12.29,12.8,11.76,9.22,7|2|0,3,29,9.67,93.55,22,3,14.0,1.0,10.33,10.33,0|0|0



📋 COMPREHENSIVE BOWLER COMPARISON AT OLD TRAFFORD, MANCHESTER
Legend: O_ = Overall (All Venues), V_ = Venue Specific
Mat=Matches, Wkts=Wickets, Avg=Bowling Average, Eco=Economy, BF=Best Figures, Dot%=Dot Ball %
BPD=Balls Per Dismissal, BPB=Balls Per Boundary, Mile=Milestones (3+|4+|5+ wickets)
------------------------------------------------------------------------------------------------------------------------


Unnamed: 0,Bowler,O_Mat,O_Wkts,O_Avg,O_1st,O_2nd,O_BPD,O_Mile,V_Mat,V_Wkts,V_Avg,V_Eco,V_BF,V_Dot%,V_1st,V_2nd,V_BPD,V_BPB,V_Mile
0,LWP Wells,42,37,27.14,0.73,1.05,22.0,0|0|0,21,20,25.15,7.2,2/35.0,28.88,0.6,1.27,20.95,10.22,0|0|0
1,AJ Turner,17,9,29.0,0.62,0.44,22.22,1|0|0,1,0,24.0,12.0,0/0,8.33,0.0,0.0,12.0,6.0,0|0|0
2,LS Livingstone,55,55,23.69,1.13,0.91,18.38,5|2|0,13,10,34.8,7.76,2/28.0,27.14,1.0,0.62,26.9,8.68,0|0|0
3,JJ Cobb,100,57,29.58,0.54,0.6,23.4,6|1|1,2,0,13.0,6.5,0/0,41.67,0.0,0.0,12.0,6.0,0|0|0
4,CJ Green,42,50,23.48,1.47,0.96,18.58,4|2|2,9,13,19.15,7.36,5/12.0,35.96,1.33,1.5,15.62,8.12,1|1|1
5,TW Hartley,58,48,28.83,0.68,0.97,22.83,3|1|0,22,18,30.44,8.06,4/16.0,26.23,0.64,1.0,22.67,7.29,1|1|0
6,TH Aspinwall,9,11,18.91,1.67,1.0,14.27,2|1|0,6,10,13.0,7.96,4/18.0,36.73,2.0,1.5,9.8,5.16,2|1|0
7,EG Barnard,103,66,37.36,0.75,0.52,25.59,2|0|0,2,2,18.5,7.4,2/26.0,33.33,1.0,0.0,15.0,7.5,0|0|0
8,JM Anderson,6,8,21.0,1.33,1.33,15.12,1|0|0,1,1,23.0,7.67,1/23.0,22.22,0.0,1.0,18.0,9.0,0|0|0



📊 VENUE STATS:


Unnamed: 0,1,2,total_match_run,match_count,avg_innings_1,avg_innings_2,inning_1_wins,inning_2_wins,HS,LS,HC,LD,avg_runs
"Old Trafford, Manchester",4633.0,4332.0,8965.0,28,165.46,154.71,16,10,219.0,74.0,198.0,74.0,160.09



📊 HEAD-TO-HEAD MATCHUPS:


Unnamed: 0,match_id,innings,runs,balls,dots,dismissals,fours,sixes,SR,BPD,BPB,bowler,batsman
0,1410492,2,1,3,2,1,0,0,33.33,3.0,0.0,LWP Wells,R Vasconcelos
1,1410492,2,2,3,1,1,0,0,66.67,3.0,0.0,LWP Wells,MP Breetzke
2,1410492,2,4,5,1,0,0,0,80.0,0.0,0.0,LWP Wells,DJ Willey
3,1410492,2,13,11,3,0,1,0,118.18,0.0,11.0,LWP Wells,RS Bopara
4,1250338,1,1,3,2,0,0,0,33.33,0.0,0.0,LWP Wells,SA Zaib
5,1297831,1,10,10,4,0,1,0,100.0,0.0,10.0,LWP Wells,SA Zaib
6,1347688,1,2,3,1,0,0,0,66.67,0.0,0.0,LWP Wells,SA Zaib
7,1410492,2,2,3,1,0,0,0,66.67,0.0,0.0,LWP Wells,SA Zaib
8,1347688,1,6,5,0,0,0,0,120.0,0.0,0.0,LWP Wells,LD McManus
9,1410492,2,2,2,0,0,0,0,100.0,0.0,0.0,LS Livingstone,R Vasconcelos


**#With Extra Parmater**

In [3]:
import pandas as pd
import numpy as np
from fuzzywuzzy import process
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_colwidth', None)

# Load data
deliveries = pd.read_csv('all_matches_2021_onwards.csv')
df = deliveries.copy()

# Data preparation (following your exact pattern)
df = df.rename(columns={'striker': 'batsman'})
df = df.rename(columns={'runs_off_bat': 'runs_of_bat'})
df['innings'] = df['innings'].astype(int)

# Fill missing values
df['wides'] = df['wides'].fillna(0)
df['noballs'] = df['noballs'].fillna(0)

# Calculate total runs
df['total_runs'] = df['runs_of_bat'] + df['wides'] + df['noballs']

def multi_player_head_to_head_analysis(df, venue_input, bowler_inputs, batsman_inputs, threshold=80):
    """
    Comprehensive head-to-head analysis between multiple bowlers and batsmen at specific venue
    Returns two DataFrames: match-wise breakdown and overall summary for all combinations
    """
    
    print(f"🏏 MULTI-PLAYER HEAD-TO-HEAD MATCHUP ANALYSIS")
    print("=" * 100)
    print(f"📍 Venue: {venue_input}")
    print(f"🎳 Bowlers: {', '.join(bowler_inputs)}")
    print(f"🏏 Batsmen: {', '.join(batsman_inputs)}")
    print("=" * 100)
    
    # Filter for venue
    venue_df = df[df['venue'] == venue_input]
    
    if venue_df.empty:
        print(f"❌ No data found for venue: {venue_input}")
        return pd.DataFrame(), pd.DataFrame()
    
    # Get available players at venue for fuzzy search
    venue_bowlers = venue_df['bowler'].unique().tolist()
    venue_batsmen = venue_df['batsman'].unique().tolist()
    
    # Fuzzy search for bowlers
    matched_bowlers = {}
    exact_bowler_matches = []
    
    print(f"\n🔍 BOWLER SEARCH RESULTS:")
    print("-" * 50)
    for bowler in bowler_inputs:
        bowler_matches = process.extract(bowler, venue_bowlers, limit=3)
        bowler_good_matches = [(name, score) for name, score in bowler_matches if score >= threshold]
        
        if bowler_good_matches:
            matched_bowler = bowler_good_matches[0][0]
            confidence = bowler_good_matches[0][1]
            exact_bowler_matches.append(matched_bowler)
            matched_bowlers[bowler] = matched_bowler
            emoji = "🎯" if confidence == 100 else "✅"
            print(f"{emoji} '{bowler}' → '{matched_bowler}' ({confidence}%)")
        else:
            print(f"❌ '{bowler}' → Not found")
            suggestions = ", ".join([f"{name} ({score}%)" for name, score in bowler_matches[:2]])
            print(f"   💡 Try: {suggestions}")
    
    # Fuzzy search for batsmen
    matched_batsmen = {}
    exact_batsman_matches = []
    
    print(f"\n🔍 BATSMAN SEARCH RESULTS:")
    print("-" * 50)
    for batsman in batsman_inputs:
        batsman_matches = process.extract(batsman, venue_batsmen, limit=3)
        batsman_good_matches = [(name, score) for name, score in batsman_matches if score >= threshold]
        
        if batsman_good_matches:
            matched_batsman = batsman_good_matches[0][0]
            confidence = batsman_good_matches[0][1]
            exact_batsman_matches.append(matched_batsman)
            matched_batsmen[batsman] = matched_batsman
            emoji = "🎯" if confidence == 100 else "✅"
            print(f"{emoji} '{batsman}' → '{matched_batsman}' ({confidence}%)")
        else:
            print(f"❌ '{batsman}' → Not found")
            suggestions = ", ".join([f"{name} ({score}%)" for name, score in batsman_matches[:2]])
            print(f"   💡 Try: {suggestions}")
    
    if not exact_bowler_matches or not exact_batsman_matches:
        print(f"\n❌ Insufficient players found for analysis")
        return pd.DataFrame(), pd.DataFrame()
    
    print(f"\n📊 Found {len(exact_bowler_matches)} bowlers and {len(exact_batsman_matches)} batsmen")
    print(f"🎯 Analyzing {len(exact_bowler_matches) * len(exact_batsman_matches)} possible matchups")
    
    # Analyze all combinations
    all_match_wise_data = []
    all_overall_data = []
    
    for bowler in exact_bowler_matches:
        for batsman in exact_batsman_matches:
            
            # Find common matches where both players played
            match_ids_bowler = set(venue_df[venue_df['bowler'] == bowler]['match_id'].unique())
            match_ids_batsman = set(venue_df[venue_df['batsman'] == batsman]['match_id'].unique())
            common_match_ids = match_ids_bowler.intersection(match_ids_batsman)
            
            if not common_match_ids:
                continue
            
            # Filter data for these matches
            matchup_df = venue_df[venue_df['match_id'].isin(common_match_ids)].copy()
            
            # Filter deliveries where bowler bowled to batsman
            bowler_batsman_df = matchup_df[
                (matchup_df['bowler'] == bowler) & 
                (matchup_df['batsman'] == batsman)
            ]
            
            if bowler_batsman_df.empty:
                continue
            
            # MATCH-WISE BREAKDOWN
            grouped = bowler_batsman_df.groupby(['match_id', 'innings'])
            
            matchup_stats = grouped.agg(
                runs=('runs_of_bat', 'sum'),
                balls=('runs_of_bat', 'count'),
                dots=('runs_of_bat', lambda x: (x == 0).sum()),
                dismissals=('player_dismissed', lambda x: x.eq(batsman).sum()),
                fours=('runs_of_bat', lambda x: (x == 4).sum()),
                sixes=('runs_of_bat', lambda x: (x == 6).sum())
            ).reset_index()
            
            # Calculate match-wise metrics
            matchup_stats['30s'] = matchup_stats['runs'].apply(lambda x: 1 if x >= 30 else 0)
            matchup_stats['50s'] = matchup_stats['runs'].apply(lambda x: 1 if x >= 50 else 0)
            matchup_stats['100s'] = matchup_stats['runs'].apply(lambda x: 1 if x >= 100 else 0)
            matchup_stats['HS'] = matchup_stats['runs']
            matchup_stats['RPI'] = matchup_stats['runs']
            
            # Strike Rate
            matchup_stats['SR'] = matchup_stats.apply(
                lambda row: round(row['runs'] / row['balls'] * 100, 2) if row['balls'] > 0 else 0, axis=1
            )
            
            # Balls per dismissal
            matchup_stats['BPD'] = matchup_stats.apply(
                lambda row: round(row['balls'] / row['dismissals'], 2) if row['dismissals'] > 0 else 0, axis=1
            )
            
            # Balls per boundary
            matchup_stats['BPB'] = matchup_stats.apply(
                lambda row: round(row['balls'] / (row['fours'] + row['sixes']), 2) if (row['fours'] + row['sixes']) > 0 else 0, axis=1
            )
            
            # Add player names
            matchup_stats['batsman'] = batsman
            matchup_stats['bowler'] = bowler
            matchup_stats['venue'] = venue_input
            
            # Store match-wise data
            for _, row in matchup_stats.iterrows():
                all_match_wise_data.append(row.to_dict())
            
            # OVERALL SUMMARY FOR THIS COMBINATION
            total_runs = bowler_batsman_df['runs_of_bat'].sum()
            total_balls = len(bowler_batsman_df)
            total_dismissals = bowler_batsman_df['player_dismissed'].eq(batsman).sum()
            total_fours = (bowler_batsman_df['runs_of_bat'] == 4).sum()
            total_sixes = (bowler_batsman_df['runs_of_bat'] == 6).sum()
            total_dots = (bowler_batsman_df['runs_of_bat'] == 0).sum()
            
            # Overall metrics
            overall_sr = round(total_runs / total_balls * 100, 2) if total_balls > 0 else 0
            overall_rpi = round(total_runs / len(common_match_ids), 2) if len(common_match_ids) > 0 else 0
            overall_bpd = round(total_balls / total_dismissals, 2) if total_dismissals > 0 else 0
            overall_bpb = round(total_balls / (total_fours + total_sixes), 2) if (total_fours + total_sixes) > 0 else 0
            overall_dot_pct = round(total_dots / total_balls * 100, 2) if total_balls > 0 else 0
            overall_boundary_pct = round((total_fours + total_sixes) / total_balls * 100, 2) if total_balls > 0 else 0
            
            # Bowling perspective metrics
            bowling_avg = round(total_runs / total_dismissals, 2) if total_dismissals > 0 else 0
            bowling_sr = round(total_balls / total_dismissals, 2) if total_dismissals > 0 else 0
            economy_rate = round(total_runs / (total_balls / 6), 2) if total_balls > 0 else 0
            
            # Milestones
            total_30s = matchup_stats['30s'].sum()
            total_50s = matchup_stats['50s'].sum()
            total_100s = matchup_stats['100s'].sum()
            highest_score = matchup_stats['runs'].max() if not matchup_stats.empty else 0
            
            # Store overall data
            overall_summary = {
                'venue': venue_input,
                'bowler': bowler,
                'batsman': batsman,
                'total_matches': len(common_match_ids),
                'total_innings': len(matchup_stats),
                'total_runs': total_runs,
                'total_balls': total_balls,
                'total_dismissals': total_dismissals,
                'fours': total_fours,
                'sixes': total_sixes,
                'dots': total_dots,
                'boundaries': total_fours + total_sixes,
                'highest_score': highest_score,
                'milestones_30s': total_30s,
                'milestones_50s': total_50s,
                'milestones_100s': total_100s,
                'overall_avg': overall_rpi,
                'overall_sr': overall_sr,
                'overall_bpd': overall_bpd,
                'overall_bpb': overall_bpb,
                'dot_percentage': overall_dot_pct,
                'boundary_percentage': overall_boundary_pct,
                'bowling_avg': bowling_avg,
                'bowling_sr': bowling_sr,
                'economy_rate': economy_rate,
                'dominance': 'Batsman' if overall_sr > 120 else 'Bowler' if overall_sr < 80 else 'Balanced'
            }
            
            all_overall_data.append(overall_summary)
    
    # Create DataFrames
    if all_match_wise_data:
        match_wise_df = pd.DataFrame(all_match_wise_data)
        match_wise_df = match_wise_df[[
            'venue', 'bowler', 'batsman', 'match_id', 'innings', 'runs', 'balls', 'dots', 
            'dismissals', 'fours', 'sixes', '30s', '50s', '100s', 'HS', 'RPI', 'SR', 'BPD', 'BPB'
        ]]
    else:
        match_wise_df = pd.DataFrame()
    
    if all_overall_data:
        overall_df = pd.DataFrame(all_overall_data)
        # Sort by total runs descending
        overall_df = overall_df.sort_values('total_runs', ascending=False)
    else:
        overall_df = pd.DataFrame()
    
    # Display results
    print(f"\n📋 MATCH-WISE BREAKDOWN ({len(all_match_wise_data)} total encounters):")
    print("-" * 100)
    from IPython.display import display
    if not match_wise_df.empty:
        display(match_wise_df.head(20))  # Show first 20 encounters
    else:
        print("❌ No match-wise data found")
    
    print(f"\n📊 OVERALL SUMMARY ({len(all_overall_data)} matchups):")
    print("-" * 100)
    if not overall_df.empty:
        display(overall_df)
        
        # Enhanced insights for multiple matchups
        print(f"\n💡 MULTI-PLAYER INSIGHTS:")
        
        # Best batting performances
        best_sr = overall_df.loc[overall_df['overall_sr'].idxmax()]
        most_runs = overall_df.loc[overall_df['total_runs'].idxmax()]
        most_boundaries = overall_df.loc[overall_df['boundary_percentage'].idxmax()]
        
        # Best bowling performances
        best_bowling_avg = overall_df[overall_df['bowling_avg'] > 0].loc[overall_df[overall_df['bowling_avg'] > 0]['bowling_avg'].idxmin()] if (overall_df['bowling_avg'] > 0).any() else None
        best_economy = overall_df[overall_df['economy_rate'] > 0].loc[overall_df[overall_df['economy_rate'] > 0]['economy_rate'].idxmin()] if (overall_df['economy_rate'] > 0).any() else None
        most_dots = overall_df.loc[overall_df['dot_percentage'].idxmax()]
        
        print(f"🏆 Best Strike Rate: {best_sr['batsman']} vs {best_sr['bowler']} (SR: {best_sr['overall_sr']})")
        print(f"🔥 Most Runs: {most_runs['batsman']} vs {most_runs['bowler']} ({most_runs['total_runs']} runs)")
        print(f"💥 Most Boundaries: {most_boundaries['batsman']} vs {most_boundaries['bowler']} ({most_boundaries['boundary_percentage']}%)")
        
        if best_bowling_avg is not None:
            print(f"🎳 Best Bowling Avg: {best_bowling_avg['bowler']} vs {best_bowling_avg['batsman']} (Avg: {best_bowling_avg['bowling_avg']})")
        if best_economy is not None:
            print(f"💰 Best Economy: {best_economy['bowler']} vs {best_economy['batsman']} (Eco: {best_economy['economy_rate']})")
        print(f"🛡️ Most Dots: {most_dots['bowler']} vs {most_dots['batsman']} ({most_dots['dot_percentage']}%)")
        
        # Dominance analysis
        batsman_dominated = overall_df[overall_df['dominance'] == 'Batsman'].shape[0]
        bowler_dominated = overall_df[overall_df['dominance'] == 'Bowler'].shape[0]
        balanced = overall_df[overall_df['dominance'] == 'Balanced'].shape[0]
        
        print(f"\n📈 DOMINANCE BREAKDOWN:")
        print(f"🏏 Batsman Dominated: {batsman_dominated} matchups")
        print(f"🎳 Bowler Dominated: {bowler_dominated} matchups")
        print(f"⚖️ Balanced: {balanced} matchups")
        
    else:
        print("❌ No overall data found")
    
    return match_wise_df, overall_df

# Example usage with multiple players
print("🏏 MULTI-PLAYER CRICKET HEAD-TO-HEAD ANALYSIS")
print("=" * 80)

# Define multiple bowlers and batsmen
venue_input = 'Old Trafford, Manchester'

batsman_inputs = [
        "KK Jennings", "LWP Wells", "MF Hurst", "AJ Turner","LS Livingstone", 
    "JJ Bohannon", "CJ Green", "TW Hartley", "TH Aspinwall", "Charlie Barnard", "James Anderson"
]

bowler_inputs = [
    "R Vasconcelos", "MP Breetzke", "DJ Willey", "Ravi Bopara", "Saif Zaib", "GA Bartlett", "LD McManus", "Luke Procter", 
                "Ben Sanderson", "GLS Scrimshaw", "Lloyd Pope"
]

# Analyze all combinations
match_wise_df, overall_df = multi_player_head_to_head_analysis(df, venue_input, bowler_inputs, batsman_inputs)

# Access individual DataFrames
print(f"\n🔍 You can access:")
print(f"📋 Match-wise breakdown: match_wise_df ({len(match_wise_df)} encounters)")
print(f"📊 Overall summary: overall_df ({len(overall_df)} unique matchups)")


🏏 MULTI-PLAYER CRICKET HEAD-TO-HEAD ANALYSIS
🏏 MULTI-PLAYER HEAD-TO-HEAD MATCHUP ANALYSIS
📍 Venue: Old Trafford, Manchester
🎳 Bowlers: R Vasconcelos, MP Breetzke, DJ Willey, Ravi Bopara, Saif Zaib, GA Bartlett, LD McManus, Luke Procter, Ben Sanderson, GLS Scrimshaw, Lloyd Pope
🏏 Batsmen: KK Jennings, LWP Wells, MF Hurst, AJ Turner, LS Livingstone, JJ Bohannon, CJ Green, TW Hartley, TH Aspinwall, Charlie Barnard, James Anderson

🔍 BOWLER SEARCH RESULTS:
--------------------------------------------------
❌ 'R Vasconcelos' → Not found
   💡 Try: RP Jones (57%), S Conners (55%)
❌ 'MP Breetzke' → Not found
   💡 Try: LM Reece (63%), DM Bess (51%)
🎯 'DJ Willey' → 'DJ Willey' (100%)
✅ 'Ravi Bopara' → 'RS Bopara' (80%)
✅ 'Saif Zaib' → 'SA Zaib' (88%)
❌ 'GA Bartlett' → Not found
   💡 Try: TW Hartley (57%), M Carter (53%)
❌ 'LD McManus' → Not found
   💡 Try: DR Mousley (50%), DR Sams (47%)
❌ 'Luke Procter' → Not found
   💡 Try: JE Root (64%), M Carter (57%)
✅ 'Ben Sanderson' → 'BW Sanderson' (88%)

Unnamed: 0,venue,bowler,batsman,match_id,innings,runs,balls,dots,dismissals,fours,sixes,30s,50s,100s,HS,RPI,SR,BPD,BPB
0,"Old Trafford, Manchester",DJ Willey,LWP Wells,1410492,1,4,1,0,0,1,0,0,0,0,4,4,400.0,0.0,1.0
1,"Old Trafford, Manchester",DJ Willey,CJ Green,1410492,1,1,1,0,0,0,0,0,0,0,1,1,100.0,0.0,0.0
2,"Old Trafford, Manchester",DJ Willey,TW Hartley,1410492,1,0,1,1,1,0,0,0,0,0,0,0,0.0,1.0,0.0
3,"Old Trafford, Manchester",RS Bopara,MF Hurst,1410492,1,6,7,3,1,0,0,0,0,0,6,6,85.71,7.0,0.0
4,"Old Trafford, Manchester",RS Bopara,LS Livingstone,1410492,1,5,2,0,0,1,0,0,0,0,5,5,250.0,0.0,2.0
5,"Old Trafford, Manchester",SA Zaib,MF Hurst,1410492,1,1,1,0,0,0,0,0,0,0,1,1,100.0,0.0,0.0
6,"Old Trafford, Manchester",SA Zaib,LS Livingstone,1410492,1,8,5,2,0,0,1,0,0,0,8,8,160.0,0.0,5.0
7,"Old Trafford, Manchester",BW Sanderson,KK Jennings,1297831,2,1,3,2,0,0,0,0,0,0,1,1,33.33,0.0,0.0
8,"Old Trafford, Manchester",BW Sanderson,KK Jennings,1410492,1,9,5,2,0,2,0,0,0,0,9,9,180.0,0.0,2.5
9,"Old Trafford, Manchester",BW Sanderson,LWP Wells,1410492,1,6,4,1,0,0,0,0,0,0,6,6,150.0,0.0,0.0



📊 OVERALL SUMMARY (14 matchups):
----------------------------------------------------------------------------------------------------


Unnamed: 0,venue,bowler,batsman,total_matches,total_innings,total_runs,total_balls,total_dismissals,fours,sixes,dots,boundaries,highest_score,milestones_30s,milestones_50s,milestones_100s,overall_avg,overall_sr,overall_bpd,overall_bpb,dot_percentage,boundary_percentage,bowling_avg,bowling_sr,economy_rate,dominance
12,"Old Trafford, Manchester",GLS Scrimshaw,KK Jennings,1,1,17,6,0,2,1,1,3,17,0,0,0,17.0,283.33,0.0,2.0,16.67,50.0,0.0,0.0,17.0,Batsman
13,"Old Trafford, Manchester",GLS Scrimshaw,LS Livingstone,2,2,16,12,0,0,2,6,2,15,0,0,0,8.0,133.33,0.0,6.0,50.0,16.67,0.0,0.0,8.0,Batsman
7,"Old Trafford, Manchester",BW Sanderson,KK Jennings,2,2,10,8,0,2,0,4,2,9,0,0,0,5.0,125.0,0.0,4.0,50.0,25.0,0.0,0.0,7.5,Batsman
6,"Old Trafford, Manchester",SA Zaib,LS Livingstone,1,1,8,5,0,0,1,2,1,8,0,0,0,8.0,160.0,0.0,5.0,40.0,20.0,0.0,0.0,9.6,Batsman
3,"Old Trafford, Manchester",RS Bopara,MF Hurst,1,1,6,7,1,0,0,3,0,6,0,0,0,6.0,85.71,7.0,0.0,42.86,0.0,6.0,7.0,5.14,Balanced
8,"Old Trafford, Manchester",BW Sanderson,LWP Wells,2,1,6,4,0,0,0,1,0,6,0,0,0,3.0,150.0,0.0,0.0,25.0,0.0,0.0,0.0,9.0,Batsman
4,"Old Trafford, Manchester",RS Bopara,LS Livingstone,1,1,5,2,0,1,0,0,1,5,0,0,0,5.0,250.0,0.0,2.0,0.0,50.0,0.0,0.0,15.0,Batsman
0,"Old Trafford, Manchester",DJ Willey,LWP Wells,1,1,4,1,0,1,0,0,1,4,0,0,0,4.0,400.0,0.0,1.0,0.0,100.0,0.0,0.0,24.0,Batsman
10,"Old Trafford, Manchester",BW Sanderson,CJ Green,1,1,4,2,1,1,0,1,1,4,0,0,0,4.0,200.0,2.0,2.0,50.0,50.0,4.0,2.0,12.0,Batsman
9,"Old Trafford, Manchester",BW Sanderson,LS Livingstone,3,2,2,7,0,0,0,5,0,1,0,0,0,0.67,28.57,0.0,0.0,71.43,0.0,0.0,0.0,1.71,Bowler



💡 MULTI-PLAYER INSIGHTS:
🏆 Best Strike Rate: LWP Wells vs DJ Willey (SR: 400.0)
🔥 Most Runs: KK Jennings vs GLS Scrimshaw (17 runs)
💥 Most Boundaries: LWP Wells vs DJ Willey (100.0%)
🎳 Best Bowling Avg: BW Sanderson vs CJ Green (Avg: 4.0)
💰 Best Economy: BW Sanderson vs LS Livingstone (Eco: 1.71)
🛡️ Most Dots: DJ Willey vs TW Hartley (100.0%)

📈 DOMINANCE BREAKDOWN:
🏏 Batsman Dominated: 8 matchups
🎳 Bowler Dominated: 2 matchups
⚖️ Balanced: 4 matchups

🔍 You can access:
📋 Match-wise breakdown: match_wise_df (17 encounters)
📊 Overall summary: overall_df (14 unique matchups)


In [13]:
df.columns

Index(['match_id', 'season', 'start_date', 'venue', 'innings', 'ball',
       'batting_team', 'bowling_team', 'batsman', 'non_striker', 'bowler',
       'runs_of_bat', 'extras', 'wides', 'noballs', 'byes', 'legbyes',
       'penalty', 'wicket_type', 'player_dismissed', 'other_wicket_type',
       'other_player_dismissed', 'total_runs'],
      dtype='object')

In [17]:
df.venue.unique()

array(['St Lawrence Ground, Canterbury', 'Old Trafford, Manchester',
       'County Ground, New Road, Worcester',
       'The Cooper Associates County Ground, Taunton',
       'Sophia Gardens, Cardiff', 'Headingley, Leeds', "Lord's, London",
       'County Ground, Bristol', 'Riverside Ground, Chester-le-Street',
       'County Ground, Chelmsford', 'County Ground, Northampton',
       'Trent Bridge, Nottingham', 'Grace Road, Leicester',
       'County Ground, Hove', 'County Ground, Derby',
       'Kennington Oval, London', 'Radlett Cricket Club, Radlett',
       'Edgbaston, Birmingham', 'St Lawrence Ground',
       'The Rose Bowl, Southampton', 'County Ground',
       'College Ground, Cheltenham', "Queen's Park, Chesterfield",
       'Stanley Park, Blackpool',
       "Merchant Taylors' School Ground, Northwood"], dtype=object)