In [16]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model

# Paths
ipl_folder = 'C:/Users/itsme/OneDrive/Documents/GitHub/dream11_backend/Data/Datasets/ipl_json_processed'
people_file = 'C:/Users/itsme/OneDrive/Documents/GitHub/dream11_backend/Data/Datasets/people (1).csv'

# Load people data
people_data = pd.read_csv(people_file)

# Combine all player CSV files
def load_ipl_data(folder):
    data = []
    for file in os.listdir(folder):
        if file.endswith('.csv'):
            player_id = os.path.splitext(file)[0]
            player_data = pd.read_csv(os.path.join(folder, file))
            player_data['player_id'] = player_id
            # print(player_id)
            data.append(player_data)
    return pd.concat(data, ignore_index=True)

ipl_data = load_ipl_data(ipl_folder)
print("loaded_data")
 


loaded_data


In [24]:
print((ipl_data.loc[1836])   )

date                                                           2011-05-21
venue                                                    Feroz Shah Kotla
event                                               Indian Premier League
match_type                                                            T20
total_overs                                                            20
player_id                                                        0af3426f
team                                                        Pune Warriors
runs_scored                                                             0
balls_faced                                                             0
boundaries                                                              0
sixes                                                                   0
balls_bowled                                                            6
wickets                                                                 1
runs_given                            

In [22]:

# Feature extraction
def extract_features(row, people_data):
    player_id = row['player_id']
    opponent_ids = row[['player1', 'player2', 'player3', 'player4', 'player5', 
                    'player6', 'player7', 'player8', 'player9', 'player10', 
                    'player11']].dropna().values
    # print(f"opponent ids {opponent_ids}")
# Ensure that we always have 11 opponents
    # if len(opponent_ids) < 11:
    #     opponent_ids = np.concatenate([opponent_ids, np.zeros(11 - len(opponent_ids))])  # Padding with zeros if fewer than 11

 
    # Player features: All batting and bowling averages
    player_stats = people_data[people_data['player_id'] == player_id]
    batting_averages = player_stats[[
        'last_3_batting_avg', 'last_5_batting_avg', 'last_10_batting_avg', 
        'last_20_batting_avg', 'overall_batting_avg'
    ]].fillna(0).values.flatten()  # Fill NaN with 0
    print(f"batting averages are {batting_averages}" )
    bowling_averages = player_stats[[
        'last_3_bowling_avg', 'last_5_bowling_avg', 'last_10_bowling_avg', 
        'last_20_bowling_avg', 'overall_bowling_avg'
    ]].fillna(0).values.flatten()  # Fill NaN with 0
    print(f"bowling averages are {bowling_averages}" )
    # Opponent features: All batting and bowling averages
    opponent_data = people_data[people_data['player_id'].isin(opponent_ids)]
    opponent_batting_averages = opponent_data[[
        'last_3_batting_avg', 'last_5_batting_avg', 'last_10_batting_avg', 
        'last_20_batting_avg', 'overall_batting_avg'
    ]].fillna(0).values.flatten()  # Fill NaN with 0
    print(f"opp batting averages are {len(opponent_batting_averages)}" )
    opponent_bowling_averages = opponent_data[[
        'last_3_bowling_avg', 'last_5_bowling_avg', 'last_10_bowling_avg', 
        'last_20_bowling_avg', 'overall_bowling_avg'
    ]].fillna(0).values.flatten()  # Fill NaN with 0
    print(f"opp bowling averages are {len(opponent_bowling_averages)}" )
    # Combine features
    features = np.concatenate([batting_averages, bowling_averages, opponent_batting_averages, opponent_bowling_averages])
    print(f"Feature vector shape: {features.shape}")  # Debugging line
    return features
    # return np.concatenate([batting_averages, bowling_averages, opponent_batting_averages, opponent_bowling_averages])



In [23]:
features=extract_features(ipl_data.loc[1836],people_data)
print((features))

batting averages are [0. 0. 0. 0. 0.]
bowling averages are [25. 25. 25. 25. 25.]
opp batting averages are 50
opp bowling averages are 50
Feature vector shape: (110,)
[ 0.          0.          0.          0.          0.         25.
 25.         25.         25.         25.          5.33333333  3.2
  1.6         0.8         0.20512821  5.66666667  6.4        11.
 16.4        16.46666667  1.          2.6         9.2        39.7
 46.57297297  2.          1.6         2.6         3.9        13.53398058
  1.33333333  3.6        15.8        15.7        14.26666667  6.66666667
  7.6         6.          3.         17.          0.          0.4
  0.4         2.1         1.56756757  0.         -0.4        -0.2
  0.2         1.         34.         22.6        21.         16.
 12.33333333 22.33333333 14.4         9.8        10.2        18.67692308
  4.33333333 11.4        34.         22.6        29.02564103  0.
  0.          0.          0.          0.          0.          0.
  0.          0.          

In [20]:


# Prepare dataset
X, y = [], []
inconsistent_rows = []

for idx, row in ipl_data.iterrows():
    features = extract_features(row, people_data)
    if len(features) != 120:
        inconsistent_rows.append(idx)

        print(idx)
    X.append(features)
    y.append([row['batting_points'], row['bowling_points'], row['fielding_points']])
print(f"Inconsistent rows: {inconsistent_rows}")
print(X[:5])
print(y[:5])
X = np.array(X)
print(X.shape)
y = np.array(y)


1836
1837
5282
5283
8056
8057
8216
8217
9586
9587
14522
14523
15928
15929
17468
17469
24310
24311
27422
27423
41454
41455
Inconsistent rows: [1836, 1837, 5282, 5283, 8056, 8057, 8216, 8217, 9586, 9587, 14522, 14523, 15928, 15929, 17468, 17469, 24310, 24311, 27422, 27423, 41454, 41455]
[array([10.        , 12.        , 38.2       , 34.1       , 26.8976378 ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       22.66666667, 17.6       , 14.8       ,  8.6       , 27.00490196,
       13.33333333,  8.4       ,  4.        , 13.8       , 23.94927536,
       23.        , 18.        , 28.2       , 45.6       , 44.55140187,
        0.        ,  0.2       ,  0.2       ,  0.3       ,  0.44360902,
       10.66666667,  7.6       , 12.6       , 16.3       , 23.95767196,
       16.66666667, 19.6       , 13.8       ,  9.7       , 16.16535433,
       -1.33333333, -1.2       ,  1.8       ,  2.3       ,  1.60714286,
       33.33333333, 23.8       , 32.2       , 39.4       , 32.149

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (49074,) + inhomogeneous part.

In [None]:
len(inconsistent_rows)

22

In [None]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Neural network model
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train.shape[1]),
    Dense(32, activation='relu'),
    Dense(3, activation='linear')  # Outputs: Batting, Bowling, Fielding points
])

model.compile(optimizer='adam', loss='mae')
# model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# Save the trained model
model.save('fantasy_team_model.h5')
print("Model saved as fantasy_team_model.h5")



In [None]:


# Predict fantasy team
def select_fantasy_team(match_data, model, people_data):
    predictions = {}
    
    # Loop through all players in the match
    for player_id in match_data['player_id']:
        # Extract features for the player
        player_row = match_data[match_data['player_id'] == player_id].iloc[0]
        features = extract_features(player_row, people_data)
        
        # Predict total fantasy points
        predictions[player_id] = model.predict(features[np.newaxis, :]).sum()
    
    # Select the top 11 players with the highest predicted points
    return sorted(predictions.keys(), key=lambda x: predictions[x], reverse=True)[:11]



# Load the saved model
model = load_model('fantasy_team_model.h5')

# Define your teams
team1 = ["db584dad","c3a96caf",	"12b610c2",	"99d63244",	"4329fbb5",	"dc9dd038"	,"bd17b45f",	"957532de",	"245c97cb",	"57ee1fde",	"18e6906e"]  # Replace with actual player IDs
team2 = ["dcce6f09",	"0a476045",	"32198ae0"	,"1c914163"	,"73ad96ed",	"2e11c706",	"890946a0","c18496e1"	,"2e81a32d"	,"96fd40ae",	"5f547c8b"]
match_data = pd.DataFrame({
    'player_id': team1 + team2,
    'player_ids': team1 + team2,
})

# Predict the best fantasy team
best_team = select_fantasy_team(match_data, model, people_data)
print("Best Fantasy Team:", best_team)

