In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
players_df = pd.read_csv('2024_players_details.csv')
ball_by_ball_df = pd.read_csv('Ball_By_Ball_Match_Data.csv')
match_info_df = pd.read_csv('Match_Info.csv')

batting_stats = ball_by_ball_df.groupby('Batter')['BatsmanRun'].sum().reset_index()
batting_stats.columns = ['player', 'total_runs']

bowling_stats = ball_by_ball_df[ball_by_ball_df['Kind'].notnull()]
bowling_stats = bowling_stats.groupby('Bowler').size().reset_index(name='wickets')
bowling_stats.columns = ['player', 'wickets']

fielding_stats = ball_by_ball_df[ball_by_ball_df['FieldersInvolved'].notnull()]
fielding_stats = fielding_stats.groupby('FieldersInvolved').size().reset_index(name='catches')
fielding_stats.columns = ['player', 'catches']

player_df = pd.merge(batting_stats, bowling_stats, on='player', how='outer')
player_df = pd.merge(player_df, fielding_stats, on='player', how='outer')
player_df.fillna(0, inplace=True)

player_df['dream11_score'] = (
    player_df['total_runs'] * 1 +
    player_df['wickets'] * 25 +
    player_df['catches'] * 8
)

FileNotFoundError: [Errno 2] No such file or directory: '2024_players_details.csv'

In [None]:
player_df['strike_rate'] = np.random.uniform(100, 160, len(player_df))
player_df['economy_rate'] = np.random.uniform(5, 10, len(player_df))
player_df['venue_encoded'] = np.random.randint(0, 10, len(player_df))
player_df['opponent_encoded'] = np.random.randint(0, 10, len(player_df))

In [None]:
features = ['total_runs', 'wickets', 'catches', 'strike_rate', 'economy_rate', 'venue_encoded', 'opponent_encoded']
target = 'dream11_score'

X_train, X_test, y_train, y_test = train_test_split(player_df[features], player_df[target], test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
predictions = model.predict(X_test)
print("RMSE:", np.sqrt(mean_squared_error(y_test, predictions)))
print("R^2 Score:", r2_score(y_test, predictions))

RMSE: 150.57330560451095
R^2 Score: 0.986266886029997


In [None]:
player_df['predicted_score'] = model.predict(player_df[features])
dream11_team = player_df.sort_values('predicted_score', ascending=False).head(11)

print("\nRecommended Dream11 Team:")
print(dream11_team[['player', 'predicted_score', 'total_runs', 'wickets', 'catches']])


Recommended Dream11 Team:
              player  predicted_score  total_runs  wickets  catches
1102         V Kohli          8560.17      8262.0      5.0    125.0
828        RG Sharma          7571.83      6686.0     16.0    105.0
887         S Dhawan          7165.37      6769.0      4.0    102.0
808        RA Jadeja          7132.48      3051.0    173.0    106.0
934         SK Raina          7094.98      5536.0     30.0    112.0
227        DA Warner          6969.37      6567.0      0.0     94.0
631         MS Dhoni          6878.74      5373.0      0.0    217.0
242         DJ Bravo          6814.80      1560.0    207.0     75.0
964        SP Narine          6433.37      1659.0    205.0     27.0
865       RV Uthappa          6347.19      4954.0      0.0    128.0
47    AB de Villiers          6336.60      5181.0      0.0    134.0


In [None]:
match_info_df['match_date'] = pd.to_datetime(match_info_df['match_date'], errors='coerce')

recent_match_ids = match_info_df.sort_values('match_date', ascending=False)['match_number'].head(5).tolist()

recent_data = ball_by_ball_df[ball_by_ball_df['ID'].isin(recent_match_ids)]


In [None]:
match_info_df['match_date'] = pd.to_datetime(match_info_df['match_date'], errors='coerce')

recent_match_ids = match_info_df.sort_values('match_date', ascending=False)['match_number'].head(5).tolist()

ball_by_ball_df.rename(columns={'ID': 'match_number'}, inplace=True)  # Run only once

recent_data = ball_by_ball_df[ball_by_ball_df['match_number'].isin(recent_match_ids)]


In [None]:
tTrecent_data = pd.merge(recent_data, match_info_df[['match_number', 'team1', 'team2']], on='match_number', how='left')

mi_srh_data = recent_data[
    (recent_data['team1'] == 'MI') & (recent_data['team2'] == 'SRH') |
    (recent_data['team1'] == 'SRH') & (recent_data['team2'] == 'MI')
]

# Now proceed with the rest of your code
batting = mi_srh_data.groupby('Batter')['BatsmanRun'].sum().reset_index()
batting.columns = ['player', 'total_runs']

bowling = mi_srh_data[mi_srh_data['Kind'].notnull()].groupby('Bowler').size().reset_index(name='wickets')
bowling.columns = ['player', 'wickets']

fielding = mi_srh_data[mi_srh_data['FieldersInvolved'].notnull()].groupby('FieldersInvolved').size().reset_index(name='catches')
fielding.columns = ['player', 'catches']

# Merge
mi_srh_players = pd.merge(batting, bowling, on='player', how='outer')
mi_srh_players = pd.merge(mi_srh_players, fielding, on='player', how='outer')
mi_srh_players.fillna(0, inplace=True)

In [None]:
# Random placeholders for now â€” later weâ€™ll pull real strike/economy
mi_srh_players['strike_rate'] = np.random.uniform(100, 160, len(mi_srh_players))
mi_srh_players['economy_rate'] = np.random.uniform(5, 10, len(mi_srh_players))
mi_srh_players['venue_encoded'] = np.random.randint(0, 10, len(mi_srh_players))
mi_srh_players['opponent_encoded'] = np.random.randint(0, 10, len(mi_srh_players))

# Ensure mi_srh_players has all features used during training
# features = ['total_runs', 'wickets', 'catches', 'strike_rate', 'economy_rate', 'venue_encoded', 'opponent_encoded']  # Assuming this is your features list

# Predict score
# ... (previous code)

# Check if mi_srh_data is empty
if len(mi_srh_data) == 0:
    print("No MI vs SRH matches found in recent data. Predicting on all recent players instead.")

    # Aggregate stats for all recent players similar to mi_srh_players
    batting = recent_data.groupby('Batter')['BatsmanRun'].sum().reset_index()
    batting.columns = ['player', 'total_runs']
    # ... (similarly for bowling and fielding stats)

    # Merge stats for all recent players
    mi_srh_players = pd.merge(batting, bowling, on='player', how='outer')
    mi_srh_players = pd.merge(mi_srh_players, fielding, on='player', how='outer')
    mi_srh_players.fillna(0, inplace=True)

    # Add necessary features for prediction
    mi_srh_players['strike_rate'] = np.random.uniform(100, 160, len(mi_srh_players))
    mi_srh_players['economy_rate'] = np.random.uniform(5, 10, len(mi_srh_players))
    mi_srh_players['venue_encoded'] = np.random.randint(0, 10, len(mi_srh_players))
    mi_srh_players['opponent_encoded'] = np.random.randint(0, 10, len(mi_srh_players))

    # Predict scores for all recent players
    mi_srh_players['predicted_score'] = model.predict(mi_srh_players[features])

# ... (rest of the code remains the same)

# Sort by predicted score
dream11_team_today = mi_srh_players.sort_values('predicted_score', ascending=False).head(11)

# Show team
print("\nðŸ”¥ Recommended Dream11 Team for MI vs SRH Today:")
print(dream11_team_today[['player', 'predicted_score', 'total_runs', 'wickets', 'catches']])

No MI vs SRH matches found in recent data. Predicting on all recent players instead.

ðŸ”¥ Recommended Dream11 Team for MI vs SRH Today:
             player  predicted_score  total_runs  wickets  catches
4   Abhishek Sharma           169.00         141      0.0      0.0
17          KK Nair           109.99          89      0.0      0.0
41          RR Pant           105.09          84      0.0      0.0
49          SS Iyer            94.57          82      0.0      0.0
56          V Kohli            88.55          62      0.0      0.0
27         N Pooran            88.40          69      0.0      0.0
1        AK Markram            86.47          64      0.0      0.0
52     Shubman Gill            85.90          60      0.0      0.0
61      YBK Jaiswal            85.34          75      0.0      0.0
7   B Sai Sudharsan            84.75          56      0.0      0.0
54          TM Head            82.03          66      0.0      0.0
