In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load ball-by-ball data
bbb_df = pd.read_csv('bbb.csv')

# Load team player data (assuming multiple files for each team)
teams = ['csk', 'dc', 'gt', 'kkr', 'lsg', 'mi', 'pbks', 'rcb', 'rr', 'srh']
team_data = {}
for team in teams:
    # Read the Excel file for each team
    file_name = f'{team}.xlsx'
    team_data[team] = pd.read_excel(file_name)
    
    # Ensure the 'player_name' column is correctly loaded
    if 'player_name' not in team_data[team].columns:
        print(f"Warning: 'player_name' column not found in {file_name}")

# Preprocess ball-by-ball data
bbb_df['Date'] = pd.to_datetime(bbb_df['Date'])
bbb_df.fillna(0, inplace=True)

# Feature Engineering
player_stats = bbb_df.groupby(['Batter']).agg({
    'Batter Runs': ['sum', 'mean'],
    'Ball': 'count'
}).reset_index()
player_stats.columns = ['Batter', 'Total Runs', 'Avg Runs', 'Balls Faced']
player_stats['Strike Rate'] = (player_stats['Total Runs'] / player_stats['Balls Faced']) * 100

bowler_stats = bbb_df.groupby(['Bowler']).agg({
    'Bowler Runs Conceded': ['sum', 'mean'],
    'Valid Ball': 'count'
}).reset_index()
bowler_stats.columns = ['Bowler', 'Total Runs Conceded', 'Avg Runs Conceded', 'Balls Bowled']
bowler_stats['Economy Rate'] = (bowler_stats['Total Runs Conceded'] / bowler_stats['Balls Bowled']) * 6

# Merge batting and bowling stats
player_performance = pd.merge(player_stats, bowler_stats, left_on='Batter', right_on='Bowler', how='outer').fillna(0)
player_performance.drop(columns=['Bowler'], inplace=True)

# Encode categorical features
encoder = LabelEncoder()
bbb_df['Venue'] = encoder.fit_transform(bbb_df['Venue'])

# Model Training
data = player_performance.drop(columns=['Batter'])
target = (player_performance['Avg Runs'] > player_performance['Avg Runs'].median()).astype(int)  # Use actual performance as target
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))

# Function to predict best 11 players
def predict_best_11(team1, team2, venue):
    venue_encoded = encoder.transform([venue])[0]
    team1_players = team_data[team1]['player_name'].tolist()
    team2_players = team_data[team2]['player_name'].tolist()
    
    team1_stats = player_performance[player_performance['Batter'].isin(team1_players)]
    team2_stats = player_performance[player_performance['Batter'].isin(team2_players)]
    
    best_11_team1 = team1_stats.sort_values(by=['Avg Runs', 'Strike Rate'], ascending=False).head(6)
    best_11_team2 = team2_stats.sort_values(by=['Avg Runs', 'Strike Rate'], ascending=False).head(6)
    
    return best_11_team1['Batter'].tolist(), best_11_team2['Batter'].tolist()

# User Input
team1 = input("Enter Team 1: ")
team2 = input("Enter Team 2: ")
venue = input("Enter Venue: ")

print("Predicted Best 11:", predict_best_11(team1, team2, venue))



Model Accuracy: 0.9928057553956835
Predicted Best 11: (['TH David', 'C Green', 'JC Archer', 'Arjun Tendulkar', 'N Wadhera', 'SA Yadav'], ['AD Russell', 'SP Narine', 'D Wiese', 'LH Ferguson', 'SN Thakur', 'RK Singh'])


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load ball-by-ball data
bbb_df = pd.read_csv('bbb.csv')

# Load team player data (assuming multiple files for each team)
teams = ['csk', 'dc', 'gt', 'kkr', 'lsg', 'mi', 'pbks', 'rcb', 'rr', 'srh']
team_data = {}
team_data = {}
for team in teams:
    # Read the Excel file for each team
    file_name = f'{team}.xlsx'  # Updated file name format
    team_data[team] = pd.read_excel(file_name)
    
    # Ensure the 'player_name' column is correctly loaded
    if 'player_name' not in team_data[team].columns:
        print(f"Warning: 'player_name' column not found in {file_name}")

# Preprocess ball-by-ball data
bbb_df['Date'] = pd.to_datetime(bbb_df['Date'])
bbb_df.fillna(0, inplace=True)

# Feature Engineering
player_stats = bbb_df.groupby(['Batter']).agg({
    'Batter Runs': ['sum', 'mean'],
    'Ball': 'count'
}).reset_index()
player_stats.columns = ['Batter', 'Total Runs', 'Avg Runs', 'Balls Faced']
player_stats['Strike Rate'] = (player_stats['Total Runs'] / player_stats['Balls Faced']) * 100

bowler_stats = bbb_df.groupby(['Bowler']).agg({
    'Bowler Runs Conceded': ['sum', 'mean'],
    'Valid Ball': 'count'
}).reset_index()
bowler_stats.columns = ['Bowler', 'Total Runs Conceded', 'Avg Runs Conceded', 'Balls Bowled']
bowler_stats['Economy Rate'] = (bowler_stats['Total Runs Conceded'] / bowler_stats['Balls Bowled']) * 6

# Merge batting and bowling stats
player_performance = pd.merge(player_stats, bowler_stats, left_on='Batter', right_on='Bowler', how='outer').fillna(0)
player_performance.drop(columns=['Bowler'], inplace=True)

# Rank players based on multiple factors
player_performance['Performance Score'] = (player_performance['Avg Runs'] * 0.5 + 
                                           player_performance['Strike Rate'] * 0.3 - 
                                           player_performance['Economy Rate'] * 0.2)
player_performance.sort_values(by='Performance Score', ascending=False, inplace=True)
player_performance['Rank'] = range(1, len(player_performance) + 1)

# Encode categorical features
encoder = LabelEncoder()
bbb_df['Venue'] = encoder.fit_transform(bbb_df['Venue'])

# Model Training
data = player_performance.drop(columns=['Batter'])
target = (player_performance['Avg Runs'] > player_performance['Avg Runs'].median()).astype(int)  # Use actual performance as target
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))

# Function to predict best 11 players
def predict_best_11(team1, team2, venue):
    venue_encoded = encoder.transform([venue])[0]
    team1_players = team_data[team1]['player_name'].tolist()
    team2_players = team_data[team2]['player_name'].tolist()
    
    team1_stats = player_performance[player_performance['Batter'].isin(team1_players)]
    team2_stats = player_performance[player_performance['Batter'].isin(team2_players)]
    
    best_11_team1 = team1_stats.sort_values(by='Performance Score', ascending=False).head(11)
    best_11_team2 = team2_stats.sort_values(by='Performance Score', ascending=False).head(11)
    
    return best_11_team1[['Batter', 'Rank']].values.tolist(), best_11_team2[['Batter', 'Rank']].values.tolist()

# User Input
team1 = input("Enter Team 1: ")
team2 = input("Enter Team 2: ")
venue = input("Enter Venue: ")

team1_best, team2_best = predict_best_11(team1, team2, venue)

print("Predicted Best 11 for", team1, ":", team1_best)
print("Predicted Best 11 for", team2, ":", team2_best)


Model Accuracy: 1.0
Predicted Best 11 for mi : [['TH David', 5], ['C Green', 27], ['JC Archer', 35], ['N Wadhera', 46], ['Arjun Tendulkar', 52], ['SA Yadav', 71], ['Tilak Varma', 86], ['Ishan Kishan', 106], ['D Brevis', 119], ['RG Sharma', 169], ['Arshad Khan', 259]]
Predicted Best 11 for kkr : [['AD Russell', 18], ['SP Narine', 32], ['D Wiese', 64], ['LH Ferguson', 65], ['RK Singh', 74], ['SN Thakur', 105], ['Rahmanullah Gurbaz', 127], ['N Rana', 148], ['VR Iyer', 195], ['Shakib Al Hasan', 245], ['Mandeep Singh', 269]]
