In [None]:
import json
import csv
import os
from collections import defaultdict
from fantasy_calculator import fantasy_calculator  # Import the fantasy_calculator function

def process_match_data(json_file, output_folder):
    # Extract match ID from the filename
    match_id = os.path.splitext(os.path.basename(json_file))[0]
    
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Extract match info
    match_date = data["info"]["dates"][0]
    venue = data["info"]["venue"]
    event = data["info"].get("event", {}).get("name", "Unknown Event")
    match_type = data["info"]["match_type"]
    total_overs = data["info"]["overs"]
    toss_winner = data["info"]["toss"]["winner"]
    toss_decision = data["info"]["toss"]["decision"]
    teams = data["info"]["teams"]
    innings_data = data["innings"]

    # Map player names to player IDs using the 'registry' section
    player_id_map = {name: str(player_id) for name, player_id in data["info"]["registry"]["people"].items()}
    
    # Map players to their respective teams
    player_team_map = {}
    for team in teams:
        for player in data["info"]["players"][team]:
            player_team_map[player] = team

    # Prepare player stats dictionary
    player_stats = defaultdict(lambda: {
        "date": match_date,
        "venue": venue,
        "event": event,
        "match_type": match_type,
        "total_overs": total_overs,
        "team": None,
        "runs_scored": 0,
        "balls_faced": 0,
        "boundaries": 0,
        "sixes": 0,
        "balls_bowled": 0,
        "wickets": 0,
        "runs_given": 0,
        "bowled_lbw": 0,
        "maidens": 0,
        "catches": 0,
        "stumpings": 0,
        "run_outs": 0
    })

    # Initialize player stats with team information from the rosters
    for player, team in player_team_map.items():
        player_stats[player]["team"] = team

    # Process innings
    for inning in innings_data:
        team = inning["team"]
        overs = inning.get("overs", [])
        for over in overs:
            maidens_flag = True  # To track maiden overs
            for delivery in over["deliveries"]:
                # Batting stats
                batter = delivery["batter"]
                bowler = delivery["bowler"]
                runs = delivery["runs"]["batter"]
                player_stats[batter]["runs_scored"] += runs
                player_stats[batter]["balls_faced"] += 1
                if runs == 4:
                    player_stats[batter]["boundaries"] += 1
                if runs == 6:
                    player_stats[batter]["sixes"] += 1

                # Wicket stats
                if "wickets" in delivery:
                    for wicket in delivery["wickets"]:
                        kind = wicket["kind"]
                        player_out = wicket["player_out"]

                        # Increment the bowler's wickets
                        player_stats[bowler]["wickets"] += 1

                        if kind in {"bowled", "lbw"}:
                            player_stats[bowler]["bowled_lbw"] += 1
                        if kind == "caught":
                            for fielder in wicket.get("fielders", []):
                                player_stats[fielder["name"]]["catches"] += 1
                        if kind == "stumped":
                            player_stats[bowler]["stumpings"] += 1
                        if kind == "run out":
                            for fielder in wicket.get("fielders", []):
                                player_stats[fielder["name"]]["run_outs"] += 1

                # Bowling stats
                if "extras" not in delivery or "wides" not in delivery["extras"] and "no_balls" not in delivery["extras"]:
                    player_stats[bowler]["balls_bowled"] += 1  # Count only legitimate balls

                player_stats[bowler]["runs_given"] += delivery["runs"]["total"]
                if delivery["runs"]["total"] > 0 or "extras" in delivery:
                    maidens_flag = False

            # Update maidens if all deliveries in the over were zero runs
            if maidens_flag:
                player_stats[bowler]["maidens"] += 1

    # Create 'player_data' folder if it does not exist
    os.makedirs(output_folder, exist_ok=True)

    # Write to individual player CSV files
    # for player, stats in player_stats.items():
    #     # Get player ID from the map
    #     player_id = str(player_id_map.get(player, f"Unknown_{player}"))  # Fallback to player name if ID is not found
    #     player_file = os.path.join(output_folder, f"{player_id}.csv")
        
    #     # Own team players
    #     fellow = stats["team"]
    #     fellow_players = [
    #         str(player_id_map.get(p, "Unknown_Player")) 
    #         for p in data["info"]["players"].get(fellow, []) 
    #         if p != player
    #     ]
        
    #     # Opponent team players
    #     opponent_team = teams[1] if teams[0] == fellow else teams[0]
    #     opponent_players = [
    #         str(player_id_map.get(p, "Unknown_Player")) 
    #         for p in data["info"]["players"].get(opponent_team, [])
    #     ]
    # for team in teams:
    #     for player in data["info"]["players"][team]:
    #         player_id = str(player_id_map[player])
    #         player_file = os.path.join(output_folder, f"{player_id}.csv")

    #         opponent_team = teams[1] if teams[0] == team else teams[0]
    #         opp_player_ids = [player_id_map[p] for p in data["info"]["players"][opponent_team][:11]]
    #         fellow_player_ids = [player_id_map[p] for p in data["info"]["players"][team] if p != player][:10]

    #         player_data = {
    #             "match_id": match_id,
    #             "player_id": player_id,
    #             "fellow_player1": fellow_player_ids[0],
    #             "fellow_player2": fellow_player_ids[1],
    #             "fellow_player3": fellow_player_ids[2],
    #             "fellow_player4": fellow_player_ids[3],
    #             "fellow_player5": fellow_player_ids[4],
    #             "fellow_player6": fellow_player_ids[5],
    #             "fellow_player7": fellow_player_ids[6],
    #             "fellow_player8": fellow_player_ids[7],
    #             "fellow_player9": fellow_player_ids[8],
    #             "fellow_player10": fellow_player_ids[9],
    #             # ... (up to fellow_player10)
    #             "opp_player1": opp_player_ids[0],
    #             "opp_player2": opp_player_ids[1],
    #             "opp_player3": opp_player_ids[2],
    #             "opp_player4": opp_player_ids[3],
    #             "opp_player5": opp_player_ids[4],
    #             "opp_player6": opp_player_ids[5],
    #             "opp_player7": opp_player_ids[6],
    #             "opp_player8": opp_player_ids[7],
    #             "opp_player9": opp_player_ids[8],
    #             "opp_player10": opp_player_ids[9],
    #             "opp_player11": opp_player_ids[10]
    #             # ... (up to opponent_player11)
    #         }
    
        # Check if file exists, if not create it with header
        file_exists = os.path.isfile(player_file)
        
        header = [
            "match_id", "date", "venue", "event", "match_type", "total_overs", "player_id", "team",
            "runs_scored", "balls_faced", "boundaries", 
            "sixes", "balls_bowled", "wickets", "runs_given", "bowled_lbw", 
            "maidens", "catches", "stumpings", "run_outs", 
            "batting_points", "bowling_points", "fielding_points", "total_fantasy_points",
            "opp_player1", "opp_player2", "opp_player3", "opp_player4", "opp_player5", 
            "opp_player6", "opp_player7", "opp_player8", "opp_player9", "opp_player10", "opp_player11",
            "fellow_player1", "fellow_player2", "fellow_player3", "fellow_player4",
            "fellow_player5", "fellow_player6", "fellow_player7", "fellow_player8",
            "fellow_player9", "fellow_player10"
        ]
        
        # Open the player CSV file
        with open(player_file, 'a', newline='') as f:
            writer = csv.writer(f)
            # Write the header if the file does not exist
            if not file_exists:
                writer.writerow(header)
            
            # Calculate fantasy points
            batting_points, bowling_points, fielding_points, total_fantasy_points = fantasy_calculator(stats)
            
            # Prepare data for this row
            row = [
                match_id, stats["date"], stats["venue"], stats["event"], stats["match_type"],
                stats["total_overs"], player_id, stats["team"], stats["runs_scored"], stats["balls_faced"],
                stats["boundaries"], stats["sixes"], stats["balls_bowled"], stats["wickets"],
                stats["runs_given"], stats["bowled_lbw"], stats["maidens"], stats["catches"],
                stats["stumpings"], stats["run_outs"], batting_points, bowling_points,
                fielding_points, total_fantasy_points
            ]
            row.extend(opponent_players[:11])  # Add opponent columns
            row.extend(fellow_players[:10])  # Add own team player IDs
            writer.writerow(row)


# Usage example
folder_path = "ipl_json"

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    # Check if it is a file (not a folder)
    if os.path.isfile(file_path) & filename.endswith(".json"):
        process_match_data(file_path, "player_data")


NameError: name 'stats' is not defined

In [46]:
import json
import csv
import os
from collections import defaultdict
from fantasy_calculator import fantasy_calculator  # Import the fantasy_calculator function

def process_match_data(json_file, output_folder):
    # Load JSON data
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Extract match info
    match_date = data["info"]["dates"][0]
    venue = data["info"]["venue"]
    event = data["info"].get("event", {}).get("name", "Unknown Event")
    match_type = data["info"]["match_type"]
    total_overs = data["info"]["overs"]
    toss_winner = data["info"]["toss"]["winner"]
    toss_decision = data["info"]["toss"]["decision"]
    teams = data["info"]["teams"]
    innings_data = data["innings"]

    # Map player names to player IDs using the 'registry' section
    player_id_map = {name: str(player_id) for name, player_id in data["info"]["registry"]["people"].items()}
    
    # Map players to their respective teams
    player_team_map = {}
    for team in teams:
        for player in data["info"]["players"][team]:
            player_team_map[player] = team

    # Prepare player stats dictionary
    player_stats = defaultdict(lambda: {
        "date": match_date,
        "venue": venue,
        "event": event,
        "match_type": match_type,
        "total_overs": total_overs,
        "team": None,
        "runs_scored": 0,
        "balls_faced": 0,
        "boundaries": 0,
        "sixes": 0,
        "balls_bowled": 0,
        "wickets": 0,
        "runs_given": 0,
        "bowled_lbw": 0,
        "maidens": 0,
        "catches": 0,
        "stumpings": 0,
        "run_outs": 0
    })

    # Initialize player stats with team information from the rosters
    for player, team in player_team_map.items():
        player_stats[player]["team"] = team

    # Process innings
    for inning in innings_data:
        team = inning["team"]
        overs = inning.get("overs", [])
        for over in overs:
            maidens_flag = True  # To track maiden overs
            for delivery in over["deliveries"]:
                # Batting stats
                batter = delivery["batter"]
                bowler = delivery["bowler"]
                runs = delivery["runs"]["batter"]
                player_stats[batter]["runs_scored"] += runs
                player_stats[batter]["balls_faced"] += 1
                if runs == 4:
                    player_stats[batter]["boundaries"] += 1
                if runs == 6:
                    player_stats[batter]["sixes"] += 1

                # Wicket stats
                if "wickets" in delivery:
                    for wicket in delivery["wickets"]:
                        kind = wicket["kind"]
                        player_out = wicket["player_out"]

                        # Increment the bowler's wickets
                        player_stats[bowler]["wickets"] += 1

                        if kind in {"bowled", "lbw"}:
                            player_stats[bowler]["bowled_lbw"] += 1
                        if kind == "caught":
                            for fielder in wicket.get("fielders", []):
                                player_stats[fielder["name"]]["catches"] += 1
                        if kind == "stumped":
                            player_stats[bowler]["stumpings"] += 1
                        if kind == "run out":
                            for fielder in wicket.get("fielders", []):
                                player_stats[fielder["name"]]["run_outs"] += 1

                # Bowling stats
                if "extras" not in delivery or "wides" not in delivery["extras"] and "no_balls" not in delivery["extras"]:
                    player_stats[bowler]["balls_bowled"] += 1  # Count only legitimate balls

                player_stats[bowler]["runs_given"] += delivery["runs"]["total"]
                if delivery["runs"]["total"] > 0 or "extras" in delivery:
                    maidens_flag = False

            # Update maidens if all deliveries in the over were zero runs
            if maidens_flag:
                player_stats[bowler]["maidens"] += 1

    # Add opponent playing XI
    for player in player_stats:
        team = player_stats[player]["team"]
        opponent_team = teams[1] if teams[0] == team else teams[0]
        opponent_playing_xi = data["info"]["players"].get(opponent_team, [])
        
        for i, opponent_player in enumerate(opponent_playing_xi[:11]):
            # Validate opponent player and assign default if missing
            player_stats[player][f"player{i+1}"] = opponent_player

        # Validate opponent player columns before writing
        for i in range(11):
            col_name = f"player{i+1}"
            if not isinstance(player_stats[player][col_name], str):
                player_stats[player][col_name] = "Unknown_Player"

    # Create 'player_data' folder if it does not exist
    os.makedirs(output_folder, exist_ok=True)

    # Write to individual player CSV files
    for player, stats in player_stats.items():
        # Use player name as the filename
        player_name = player
        player_file = os.path.join(output_folder, f"{player_name}.csv")
        
        print(f"Processing player: {player}, Player Name: {player_name}, File: {player_file}")  # Debugging statement
        
        # Check if file exists, if not create it with header
        file_exists = os.path.isfile(player_file)
        
        header = [
            "date", "venue", "event", "match_type", "total_overs", "player_name", "team",
            "runs_scored", "balls_faced", "boundaries", 
            "sixes", "balls_bowled", "wickets", "runs_given", "bowled_lbw", 
            "maidens", "catches", "stumpings", "run_outs", 
            "batting_points", "bowling_points", "fielding_points", "total_fantasy_points",
            "player1", "player2", "player3", "player4", "player5", "player6", 
            "player7", "player8", "player9", "player10", "player11"
        ]
        
        # Open the player CSV file
        with open(player_file, 'a', newline='') as f:
            writer = csv.writer(f)
            # Write the header if the file does not exist
            if not file_exists:
                writer.writerow(header)
            
            # Calculate fantasy points
            batting_points, bowling_points, fielding_points, total_fantasy_points = fantasy_calculator(stats)
            
            # Prepare data for this row
            # Prepare data for this row (all stats except the fantasy points and opponent players)
# Prepare the main stats row (excluding fantasy points and opponent players)
            row = [str(stats[col]) if col in stats else "0" for col in header[:19]]  # All columns except fantasy points and opponent players

            # Replace player_name instead of player_id for 'player_name' column
            row[5] = player_name  # Player name column (instead of player_id)

            # Append the calculated fantasy points (batting, bowling, fielding, and total)
            row.append(batting_points)  # Batting points
            row.append(bowling_points)  # Bowling points
            row.append(fielding_points)  # Fielding points
            row.append(total_fantasy_points)  # Total fantasy points

            # Add opponent player names to the row (after the fantasy points)
            row.extend([str(stats.get(f"player{i+1}", "Unknown_Player")) for i in range(11)])

            # Write the row to the CSV file
            writer.writerow(row)




In [47]:
folder_path = "ipl_json"

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    file_path = os.path.join(folder_path, filename)
    # Check if it is a file (not a folder)
    if os.path.isfile(file_path) & filename.endswith(".json"):
        process_match_data(file_path, "player_csv")

Processing player: DA Warner, Player Name: DA Warner, File: player_csv\DA Warner.csv
Processing player: S Dhawan, Player Name: S Dhawan, File: player_csv\S Dhawan.csv
Processing player: MC Henriques, Player Name: MC Henriques, File: player_csv\MC Henriques.csv
Processing player: Yuvraj Singh, Player Name: Yuvraj Singh, File: player_csv\Yuvraj Singh.csv
Processing player: DJ Hooda, Player Name: DJ Hooda, File: player_csv\DJ Hooda.csv
Processing player: BCJ Cutting, Player Name: BCJ Cutting, File: player_csv\BCJ Cutting.csv
Processing player: NV Ojha, Player Name: NV Ojha, File: player_csv\NV Ojha.csv
Processing player: Bipul Sharma, Player Name: Bipul Sharma, File: player_csv\Bipul Sharma.csv
Processing player: B Kumar, Player Name: B Kumar, File: player_csv\B Kumar.csv
Processing player: A Nehra, Player Name: A Nehra, File: player_csv\A Nehra.csv
Processing player: Rashid Khan, Player Name: Rashid Khan, File: player_csv\Rashid Khan.csv
Processing player: CH Gayle, Player Name: CH Gayle

In [39]:
import os
import pandas as pd

def find_outliers_in_folder(folder_path, column_name):
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)

            try:
                df = pd.read_csv(file_path) 

                non_string_rows = df[~df[column_name].apply(lambda x: isinstance(x, str))]

                if not non_string_rows.empty:
                    print(f"Outliers found in file '{file_name}':")
                    for _, row in non_string_rows.iterrows():
                        print(f" - Player ID: {row[column_name]}")
                    print("-" * 50)
            except Exception as e:
                print(f"Error processing file '{file_name}': {e}")

# Example usage
folder_path = "player_csv"
# column_name = "player_id"

# find_outliers_in_folder(folder_path, column_name)


def check_for_empty_cells(file_path):
    """Checks if a given CSV file has any empty cells.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        bool: True if the file has empty cells, False otherwise.
    """

    df = pd.read_csv(file_path)
    if df.isnull().values.any():
        return True
    else:
        return False

# Example usage:
for file_name in os.listdir(folder_path):
        if file_name.endswith(".csv"):
            file_path = os.path.join(folder_path, file_name)
            # file_path = "your_data.csv"
            has_empty_cells = check_for_empty_cells(file_path)

            if has_empty_cells:
                print(f"The {file_path}CSV file has empty cells.")
            # else:
            #     print("The CSV file has no empty cells.")


The player_csv\A Ashish Reddy.csvCSV file has empty cells.
The player_csv\A Badoni.csvCSV file has empty cells.
The player_csv\A Chandila.csvCSV file has empty cells.
The player_csv\A Chopra.csvCSV file has empty cells.
The player_csv\A Choudhary.csvCSV file has empty cells.
The player_csv\A Dananjaya.csvCSV file has empty cells.
The player_csv\A Flintoff.csvCSV file has empty cells.
The player_csv\A Kamboj.csvCSV file has empty cells.
The player_csv\A Kumble.csvCSV file has empty cells.
The player_csv\A Manohar.csvCSV file has empty cells.
The player_csv\A Mishra.csvCSV file has empty cells.
The player_csv\A Mithun.csvCSV file has empty cells.
The player_csv\A Mukund.csvCSV file has empty cells.
The player_csv\A Nehra.csvCSV file has empty cells.
The player_csv\A Nel.csvCSV file has empty cells.
The player_csv\A Nortje.csvCSV file has empty cells.
The player_csv\A Raghuvanshi.csvCSV file has empty cells.
The player_csv\A Singh.csvCSV file has empty cells.
The player_csv\A Symonds.csvC

In [18]:
df = pd.read_csv(file_path)
df = df.rename(columns={"identifier": "player_id"})
df.to_csv(file_path, index=False)

In [59]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping


# Directory containing individual player data files
data_folder = 'player_csv'  # Replace with the actual folder path

# Function to compute rolling averages
def compute_rolling_averages(df, player_name, date, column, windows):
    player_data = df[(df['player_name'] == player_name) & (df['date'] < date)].sort_values(by='date')
    averages = []
    for window in windows:
        avg = player_data[column].tail(window).mean()
        averages.append(avg if not np.isnan(avg) else 0)
    overall_avg = player_data[column].mean()
    averages.append(overall_avg if not np.isnan(overall_avg) else 0)
    return averages

# Load all player data from CSV files
player_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
features = []
targets = []

windows = [3, 5, 10, 20]

# Loop through each player's data file
for player_file in player_files:
    # print(player_file)
    player_data = pd.read_csv(os.path.join(data_folder, player_file))
    # print(player_data)
    # Loop through each match in the player's data
    for i, row in player_data.iterrows():
        # Compute rolling averages for the current player
        batting_avgs = compute_rolling_averages(player_data, row['player_name'], row['date'], 'batting_points', windows)
        bowling_avgs = compute_rolling_averages(player_data, row['player_name'], row['date'], 'bowling_points', windows)
        fielding_avgs = compute_rolling_averages(player_data, row['player_name'], row['date'], 'fielding_points', windows)
        
        # Now compute rolling averages for the 11 opponents
        opponent_features = []
        for j in range(1, 12):  # opponent columns: player1 to player11
            opponent_name = row[f'player{j}']
            opponent_data = pd.read_csv(os.path.join(data_folder, f'{opponent_name}.csv'))  # Opponent player's data
            
            # Compute rolling averages for the opponent's batting, bowling, and fielding
            opponent_batting_avgs = compute_rolling_averages(opponent_data, opponent_name, row['date'], 'batting_points', windows)
            opponent_bowling_avgs = compute_rolling_averages(opponent_data, opponent_name, row['date'], 'bowling_points', windows)
            opponent_fielding_avgs = compute_rolling_averages(opponent_data, opponent_name, row['date'], 'fielding_points', windows)
            
            # Append the opponent's metrics to the opponent_features list
            opponent_features += opponent_batting_avgs + opponent_bowling_avgs + opponent_fielding_avgs
        
        # Combine the player's features and opponent features
        full_features = batting_avgs + bowling_avgs + fielding_avgs + opponent_features
        
        # Append the features and target (batting, bowling, fielding points) to the dataset
        features.append(full_features)
        targets.append([row['batting_points'], row['bowling_points'], row['fielding_points']])

# Convert features and targets to numpy arrays
features = np.array(features)
targets = np.array(targets)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Define the Neural Network model
model = Sequential([
    Dense(128, input_dim=X_train.shape[1], activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(3, activation='linear')  # Output: Batting, Bowling, Fielding Points
])

# Compile the model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mae', metrics=['mae'])

# Set up callbacks
log_dir = 'logs/fit/'
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Change the checkpoint file to save in the .keras format
checkpoint_callback = ModelCheckpoint('model_checkpoint.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=1)

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with callbacks
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[tensorboard_callback, checkpoint_callback, early_stopping_callback]
)

# Save the final model in .keras format
model.save('final_model.keras')

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test Loss (MAE): {test_loss}")

# Predictions
predictions = model.predict(X_test)

# Display some predictions vs actual
for i in range(5):
    print(f"Actual: {y_test[i]}, Predicted: {predictions[i]}")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m454/491[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 1ms/step - loss: 11.2280 - mae: 11.2280
Epoch 1: val_loss improved from inf to 10.23877, saving model to model_checkpoint.keras
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 11.1877 - mae: 11.1877 - val_loss: 10.2388 - val_mae: 10.2388
Epoch 2/100
[1m478/491[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - loss: 10.2790 - mae: 10.2790
Epoch 2: val_loss improved from 10.23877 to 10.05162, saving model to model_checkpoint.keras
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 10.2792 - mae: 10.2792 - val_loss: 10.0516 - val_mae: 10.0516
Epoch 3/100
[1m455/491[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 1ms/step - loss: 10.2387 - mae: 10.2387
Epoch 3: val_loss did not improve from 10.05162
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 10.2341 - mae: 10.2341 - val_loss: 10.1552 - val_mae: 10

In [None]:
model.load_weights('model_checkpoint.h5')
# Continue training...
history = model.fit(...)


In [None]:
combined_data=np.concatenate((features,targets),axis=1)
df = pd.DataFrame(combined_data)
df.to_csv('combined_data.csv', index=False)



In [73]:
print(features.shape)
print(targets[1].shape)

(24537, 180)
(3,)


In [72]:
import shap
from sklearn.datasets import load_iris
explainer=shap.DeepExplainer(model,X_train)

shap_values=explainer.shap_values(X_test)
shap.summary_plot(shap_values[0], X_test, feature_names=iris.feature_names)
# shap.dependence_plot()



NameError: name 'iris' is not defined

In [78]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping

def compute_rolling_averages(df, player_name, date, column, windows):
    player_data = df[(df['player_name'] == player_name) & (df['date'] < date)].sort_values(by='date')
    averages = []
    for window in windows:
        avg = player_data[column].tail(window).mean()
        averages.append(avg if not np.isnan(avg) else 0)
    overall_avg = player_data[column].mean()
    averages.append(overall_avg if not np.isnan(overall_avg) else 0)
    return averages

def get_player_features(player_name, date, data_folder, windows):
    """
    Fetches the rolling averages and features for a single player.
    """
    player_file = os.path.join(data_folder, f"{player_name}.csv")
    if not os.path.exists(player_file):
        raise FileNotFoundError(f"Data file for player {player_name} not found.")
    
    # Load player data
    player_data = pd.read_csv(player_file)
    
    # Compute rolling averages for batting, bowling, and fielding points
    batting_avgs = compute_rolling_averages(player_data, player_name, date, 'batting_points', windows)
    bowling_avgs = compute_rolling_averages(player_data, player_name, date, 'bowling_points', windows)
    fielding_avgs = compute_rolling_averages(player_data, player_name, date, 'fielding_points', windows)
    
    return batting_avgs + bowling_avgs + fielding_avgs


def get_team_features(team_players, opponent_players, date, data_folder, windows):
    """
    Computes features for all players in a team against the given opponent players.
    """
    team_features = []
    
    # Compute opponent metrics once for reuse
    opponent_features = []
    for opponent_name in opponent_players:
        opponent_file = os.path.join(data_folder, f"{opponent_name}.csv")
        if not os.path.exists(opponent_file):
            raise FileNotFoundError(f"Data file for opponent player {opponent_name} not found.")
        
        # Load opponent data
        opponent_data = pd.read_csv(opponent_file)
        
        # Compute rolling averages for opponent's batting, bowling, and fielding points
        opponent_batting_avgs = compute_rolling_averages(opponent_data, opponent_name, date, 'batting_points', windows)
        opponent_bowling_avgs = compute_rolling_averages(opponent_data, opponent_name, date, 'bowling_points', windows)
        opponent_fielding_avgs = compute_rolling_averages(opponent_data, opponent_name, date, 'fielding_points', windows)
        
        opponent_features += opponent_batting_avgs + opponent_bowling_avgs + opponent_fielding_avgs
    
    # Compute features for each player in the team
    for player_name in team_players:
        player_features = get_player_features(player_name, date, data_folder, windows)
        full_features = player_features + opponent_features
        team_features.append((player_name, full_features))
    
    return team_features


def select_top_11_players(team1, team2, date, model, data_folder, windows):
    """
    Selects the top 11 players based on predicted fantasy points from two teams,
    including batting, bowling, and fielding points.
    """
    # Compute features for all 22 players
    team1_features = get_team_features(team1, team2, date, data_folder, windows)
    team2_features = get_team_features(team2, team1, date, data_folder, windows)
    
    # Combine all players into a single list
    all_players = team1_features + team2_features
    
    # Extract features for prediction
    player_names = [player[0] for player in all_players]
    feature_matrix = np.array([player[1] for player in all_players])
    
    # Predict fantasy points using the trained model
    predictions = model.predict(feature_matrix)
    total_fantasy_points = predictions.sum(axis=1)  # Sum batting, bowling, and fielding points
    
    # Combine player names with their predicted points
    player_scores = [
        (player_names[i], predictions[i][0], predictions[i][1], predictions[i][2], total_fantasy_points[i])
        for i in range(len(player_names))
    ]
    
    # Sort players by total fantasy points in descending order
    sorted_players = sorted(player_scores, key=lambda x: x[4], reverse=True)
    
    # Select the top 11 players
    top_11_players = sorted_players[:11]
    
    return top_11_players



# Example Usage
team1 = ["RD Gaikwad",
        "DP Conway",
        "RV Uthappa",
        "AT Rayudu",
        "RA Jadeja",
        "S Dube",
        "MS Dhoni",
        "DJ Bravo",
        "MJ Santner",
        "AF Milne",
        "TU Deshpande"]
team2 = ["AM Rahane",
        "VR Iyer",
        "N Rana",
        "SS Iyer",
        "SW Billings",
        "SP Jackson",
        "AD Russell",
        "SP Narine",
        "UT Yadav",
        "Shivam Mavi",
        "CV Varun"]
date = '2022-03-26'  # Replace with the match date



def fetch_actual_points(players, date, data_folder):
    """
    Fetches the actual batting, bowling, fielding, and total points for the given players.
    
    Args:
        players (list): List of player names.
        date (str): The match date for fetching actual points.
        data_folder (str): Path to the folder containing player CSV files.
    
    Returns:
        List of tuples: [(player_name, batting_points, bowling_points, fielding_points, total_points), ...]
    """
    actual_points = []
    
    for player_name in players:
        player_file = os.path.join(data_folder, f"{player_name}.csv")
        if not os.path.exists(player_file):
            raise FileNotFoundError(f"Data file for player {player_name} not found.")
        
        # Load player data
        player_data = pd.read_csv(player_file)
        
        # Find the row matching the given date
        player_row = player_data[player_data['date'] == date]
        if player_row.empty:
            print(f"No data found for player {player_name} on date {date}. Assigning 0 points.")
            batting_points = bowling_points = fielding_points = total_points = 0
        else:
            batting_points = player_row.iloc[0]['batting_points']
            bowling_points = player_row.iloc[0]['bowling_points']
            fielding_points = player_row.iloc[0]['fielding_points']
            total_points = batting_points + bowling_points + fielding_points
        
        # Append to the list
        actual_points.append((player_name, batting_points, bowling_points, fielding_points, total_points))
    
    return actual_points


In [79]:


# Load the trained model
from tensorflow.keras.models import load_model
model = load_model('final_model.keras')

# Select top 11 players
# Select top 11 players
top_11 = select_top_11_players(team1, team2, date, model, data_folder='player_csv', windows=[3, 5, 10, 20])

# Display the results
print("Top 11 Players Based on Predicted Fantasy Points:")
print(f"{'Player':<15} {'Batting':<10} {'Bowling':<10} {'Fielding':<10} {'Total Points':<15}")
print("-" * 60)
for player, batting, bowling, fielding, total in top_11:
    print(f"{player:<15} {batting:<10.2f} {bowling:<10.2f} {fielding:<10.2f} {total:<15.2f}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Top 11 Players Based on Predicted Fantasy Points:
Player          Batting    Bowling    Fielding   Total Points   
------------------------------------------------------------
RD Gaikwad      28.87      0.01       0.29       29.16          
VR Iyer         27.40      0.02       0.39       27.81          
SP Narine       1.37       25.29      -0.04      26.63          
CV Varun        0.19       26.24      -0.06      26.38          
Shivam Mavi     0.47       25.18      -0.06      25.58          
DJ Bravo        0.99       24.52      -0.04      25.47          
AF Milne        0.92       23.22      0.03       24.18          
MJ Santner      1.70       20.47      0.10       22.27          
UT Yadav        0.09       21.16      -0.09      21.16          
SS Iyer         19.79      -0.03      0.32       20.08          
RV Uthappa      18.44      0.03       0.21       18.68          


In [80]:
actual_points = fetch_actual_points(team1 + team2, date, data_folder='player_csv')

# Display the actual points
print("Actual Points for All 22 Players:")
print(f"{'Player':<15} {'Batting':<10} {'Bowling':<10} {'Fielding':<10} {'Total Points':<15}")
print("-" * 60)
for player, batting, bowling, fielding, total in actual_points:
    print(f"{player:<15} {batting:<10.2f} {bowling:<10.2f} {fielding:<10.2f} {total:<15.2f}")

Actual Points for All 22 Players:
Player          Batting    Bowling    Fielding   Total Points   
------------------------------------------------------------
RD Gaikwad      -2.00      0.00       0.00       -2.00          
DP Conway       3.00       0.00       0.00       3.00           
RV Uthappa      34.00      0.00       0.00       34.00          
AT Rayudu       18.00      0.00       8.00       26.00          
RA Jadeja       28.00      2.00       8.00       38.00          
S Dube          3.00       0.00       0.00       3.00           
MS Dhoni        69.00      0.00       8.00       77.00          
DJ Bravo        0.00       83.00      0.00       83.00          
MJ Santner      0.00       25.00      0.00       25.00          
AF Milne        0.00       0.00       0.00       0.00           
TU Deshpande    0.00       0.00       8.00       8.00           
AM Rahane       56.00      0.00       0.00       56.00          
VR Iyer         18.00      0.00       0.00       18.00      