In [26]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
import shap

def compute_rolling_averages(df, player_name, date, column, windows):
    player_data = df[(df['player_name'] == player_name) & (df['date'] < date)].sort_values(by='date')
    averages = []
    for window in windows:
        avg = player_data[column].tail(window).mean()
        averages.append(avg if not np.isnan(avg) else 0)
    overall_avg = player_data[column].mean()
    averages.append(overall_avg if not np.isnan(overall_avg) else 0)
    return averages

def get_player_features(player_name, date, data_folder, windows):
    """
    Fetches the rolling averages and features for a single player.
    """
    player_file = os.path.join(data_folder, f"{player_name}.csv")
    if not os.path.exists(player_file):
        raise FileNotFoundError(f"Data file for player {player_name} not found.")
    
    # Load player data
    player_data = pd.read_csv(player_file)
    
    # Compute rolling averages for batting, bowling, and fielding points
    batting_avgs = compute_rolling_averages(player_data, player_name, date, 'batting_points', windows)
    bowling_avgs = compute_rolling_averages(player_data, player_name, date, 'bowling_points', windows)
    fielding_avgs = compute_rolling_averages(player_data, player_name, date, 'fielding_points', windows)
    
    return batting_avgs + bowling_avgs + fielding_avgs


def get_team_features(team_players, opponent_players, date, data_folder, windows):
    """
    Computes features for all players in a team against the given opponent players.
    """
    team_features = []
    
    # Compute opponent metrics once for reuse
    opponent_features = []
    for opponent_name in opponent_players:
        opponent_file = os.path.join(data_folder, f"{opponent_name}.csv")
        if not os.path.exists(opponent_file):
            raise FileNotFoundError(f"Data file for opponent player {opponent_name} not found.")
        
        # Load opponent data
        opponent_data = pd.read_csv(opponent_file)
        
        # Compute rolling averages for opponent's batting, bowling, and fielding points
        opponent_batting_avgs = compute_rolling_averages(opponent_data, opponent_name, date, 'batting_points', windows)
        opponent_bowling_avgs = compute_rolling_averages(opponent_data, opponent_name, date, 'bowling_points', windows)
        opponent_fielding_avgs = compute_rolling_averages(opponent_data, opponent_name, date, 'fielding_points', windows)
        
        opponent_features += opponent_batting_avgs + opponent_bowling_avgs + opponent_fielding_avgs
    
    # Compute features for each player in the team
    for player_name in team_players:
        player_features = get_player_features(player_name, date, data_folder, windows)
        full_features = player_features + opponent_features
        team_features.append((player_name, full_features))
    
    return team_features
def select_top_11_players_with_shap(team1, team2, date, model, data_folder, windows):
    """
    Selects the top 11 players based on predicted fantasy points and computes SHAP values for each player's features.
    """
    # Compute features for all 22 players
    team1_features = get_team_features(team1, team2, date, data_folder, windows)
    team2_features = get_team_features(team2, team1, date, data_folder, windows)
    
    # Combine all players into a single list
    all_players = team1_features + team2_features
    
    # Extract player names and features
    player_names = [player[0] for player in all_players]
    feature_matrix = np.array([player[1] for player in all_players])
    
    # Predict fantasy points using the trained model
    predictions = model.predict(feature_matrix)
    total_fantasy_points = predictions.sum(axis=1)  # Sum batting, bowling, and fielding points
    
    # Combine player names with their predicted points
    player_scores = [
        (player_names[i], predictions[i][0], predictions[i][1], predictions[i][2], total_fantasy_points[i])
        for i in range(len(player_names))
    ]
    
    # Sort players by total fantasy points in descending order
    sorted_players = sorted(player_scores, key=lambda x: x[4], reverse=True)
    
    # Select the top 11 players
    top_11_players = sorted_players[:11]
    
    # Calculate SHAP values for the features
    shap_values = calculate_shap_values(model, feature_matrix)
    
    # Return the top 11 players, SHAP values, player names, and feature matrix
    return top_11_players, shap_values, player_names, feature_matrix


# Example Usage
top_11, shap_values, player_names, feature_matrix = select_top_11_players_with_shap(
    team1, team2, date, model, data_folder='player_csv', windows=[3, 5, 10, 20]
)

    
def calculate_shap_values(model, feature_matrix):
    
    # Use KernelExplainer or TreeExplainer depending on the model type
    # Here we assume you're using a neural network, so we use KernelExplainer.
    explainer = shap.KernelExplainer(model.predict, feature_matrix)
    
    # Calculate SHAP values for all players (i.e., for all rows in feature_matrix)
    shap_values = explainer.shap_values(feature_matrix)
    
    # The shap_values will be a list with one set of SHAP values per output class
    # Each class will have SHAP values for all 22 players across all 180 features
    return shap_values

# Calculate SHAP values
shap_values = calculate_shap_values(model, feature_matrix)

# SHAP values will be a list of arrays, one for each output (e.g., batting, bowling, fielding)
# We can visualize them as needed, using summary plots or dependence plots.


  

def fetch_actual_points(players, date, data_folder):
    
    actual_points = []
    
    for player_name in players:
        player_file = os.path.join(data_folder, f"{player_name}.csv")
        if not os.path.exists(player_file):
            raise FileNotFoundError(f"Data file for player {player_name} not found.")
        
        # Load player data
        player_data = pd.read_csv(player_file)
        
        # Find the row matching the given date
        player_row = player_data[player_data['date'] == date]
        if player_row.empty:
            print(f"No data found for player {player_name} on date {date}. Assigning 0 points.")
            batting_points = bowling_points = fielding_points = total_points = 0
        else:
            batting_points = player_row.iloc[0]['batting_points']
            bowling_points = player_row.iloc[0]['bowling_points']
            fielding_points = player_row.iloc[0]['fielding_points']
            total_points = batting_points + bowling_points + fielding_points
        
        # Append to the list
        actual_points.append((player_name, batting_points, bowling_points, fielding_points, total_points))
    
    return actual_points


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  0%|          | 0/22 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 437us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 414us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 423us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 404us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 420us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 405us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32

  0%|          | 0/22 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 417us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 426us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 414us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 407us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 422us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 419us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32

In [24]:
print(shap_values.shape)

(22, 180, 3)


In [27]:
# Select top 11 players and get SHAP values
top_11, shap_values, player_names, feature_matrix = select_top_11_players_with_shap(
    team1, team2, date, model, data_folder='player_csv', windows=[3, 5, 10, 20]
)

# Display the results for top 11 players
print("Top 11 Players Based on Predicted Fantasy Points:")
print(f"{'Player':<15} {'Batting':<10} {'Bowling':<10} {'Fielding':<10} {'Total Points':<15}")
print("-" * 60)
for player, batting, bowling, fielding, total in top_11:
    print(f"{player:<15} {batting:<10.2f} {bowling:<10.2f} {fielding:<10.2f} {total:<15.2f}")

# Optionally, visualize SHAP values for the top 11 players
shap.summary_plot(shap_values[0], feature_matrix, feature_names=[f"Feature {i+1}" for i in range(feature_matrix.shape[1])])
shap.summary_plot(shap_values[1], feature_matrix, feature_names=[f"Feature {i+1}" for i in range(feature_matrix.shape[1])])
shap.summary_plot(shap_values[2], feature_matrix, feature_names=[f"Feature {i+1}" for i in range(feature_matrix.shape[1])])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


  0%|          | 0/22 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 417us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 403us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 407us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 415us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 411us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1613/1613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 422us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1613/1613[0m [32

AssertionError: The shape of the shap_values matrix does not match the shape of the provided data matrix.

In [18]:
# Step 1: Check the shapes of each SHAP values and the feature matrix
print(f"Shape of shap_values_batting: {shap_values_batting.shape}")
print(f"shap values for batting:", shap_values_batting)
print(f"Shape of shap_values_bowling: {shap_values_bowling.shape}")
print(f"Shape of shap_values_fielding: {shap_values_fielding.shape}")
print(f"Shape of feature_matrix: {feature_matrix.shape}")

# Step 2: If they are consistent, concatenate SHAP values for all outputs
# Check that the number of features in each output matches the feature matrix
if shap_values_batting.shape[1] == feature_matrix.shape[1]:
    shap_values_combined = np.concatenate([shap_values_batting, shap_values_bowling, shap_values_fielding], axis=1)
    shap.summary_plot(shap_values_combined, feature_matrix, feature_names=[f"Feature {i+1}" for i in range(shap_values_combined.shape[1])])
else:
    print("Feature matrix and SHAP values do not align. Please check the feature extraction process.")



Shape of shap_values_batting: (180, 3)
shap values for batting: [[ 6.42684646e-01  1.35279355e-01  2.91848873e-02]
 [ 2.93398916e+00 -8.09883349e-01  7.63249172e-03]
 [ 4.22628574e+00 -8.92135662e-01  8.60311845e-03]
 [ 5.84065223e+00 -1.42586462e+00  1.01829441e-01]
 [ 3.92907715e+00 -1.18968175e+00  1.58936558e-03]
 [ 1.16289836e-01 -1.09083178e-01  0.00000000e+00]
 [ 2.99309324e-01 -4.44042408e-01  0.00000000e+00]
 [ 3.08748376e-01 -6.69437819e-01 -6.37487090e-03]
 [ 6.31333799e-01 -9.47531113e-01  0.00000000e+00]
 [ 9.02379538e-01 -8.99947512e-01 -1.44337115e-02]
 [-3.60034814e-02  0.00000000e+00 -6.63089479e-03]
 [-2.15973562e-02  0.00000000e+00 -6.80111930e-03]
 [ 0.00000000e+00  0.00000000e+00 -4.03706329e-03]
 [ 6.45538424e-02 -7.15792169e-02  5.41579492e-03]
 [ 1.45652110e-02  0.00000000e+00  5.56555553e-03]
 [-3.01921384e-01 -6.94178408e-02 -1.24127526e-02]
 [ 4.64503284e-02  1.96780125e-01  1.61441387e-02]
 [ 1.94479457e-01 -4.59252418e-02  1.54185785e-02]
 [-3.14780534e-01 

In [None]:


# Load the trained model
from tensorflow.keras.models import load_model
model = load_model('final_model.keras')

# Select top 11 players
# Select top 11 players
top_11 = select_top_11_players(team1, team2, date, model, data_folder='player_csv', windows=[3, 5, 10, 20])

# Display the results
print("Top 11 Players Based on Predicted Fantasy Points:")
print(f"{'Player':<15} {'Batting':<10} {'Bowling':<10} {'Fielding':<10} {'Total Points':<15}")
print("-" * 60)
for player, batting, bowling, fielding, total in top_11:
    print(f"{player:<15} {batting:<10.2f} {bowling:<10.2f} {fielding:<10.2f} {total:<15.2f}")


In [8]:
actual_points = fetch_actual_points(team1 + team2, date, data_folder='player_csv')

# Display the actual points
print("Actual Points for All 22 Players:")
print(f"{'Player':<15} {'Batting':<10} {'Bowling':<10} {'Fielding':<10} {'Total Points':<15}")
print("-" * 60)
for player, batting, bowling, fielding, total in actual_points:
    print(f"{player:<15} {batting:<10.2f} {bowling:<10.2f} {fielding:<10.2f} {total:<15.2f}")

Actual Points for All 22 Players:
Player          Batting    Bowling    Fielding   Total Points   
------------------------------------------------------------
RD Gaikwad      -2.00      0.00       0.00       -2.00          
DP Conway       3.00       0.00       0.00       3.00           
RV Uthappa      34.00      0.00       0.00       34.00          
AT Rayudu       18.00      0.00       8.00       26.00          
RA Jadeja       28.00      2.00       8.00       38.00          
S Dube          3.00       0.00       0.00       3.00           
MS Dhoni        69.00      0.00       8.00       77.00          
DJ Bravo        0.00       83.00      0.00       83.00          
MJ Santner      0.00       25.00      0.00       25.00          
AF Milne        0.00       0.00       0.00       0.00           
TU Deshpande    0.00       0.00       8.00       8.00           
AM Rahane       56.00      0.00       0.00       56.00          
VR Iyer         18.00      0.00       0.00       18.00      

In [None]:
import shap
import pandas as pd



explainer = combined_data.iloc#(player_indices)
