In [2]:
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import matplotlib.pyplot as plt

# ---------------------------
# 1. Load and merge datasets
# ---------------------------
batting_df = pd.read_csv("batsman_avg_pivot_1.csv")
bowling_df = pd.read_csv("batsman_sr_pivot_1.csv")
venue_df = pd.read_csv("batsman_matches_pivot_1.csv")
history_df = pd.read_csv("final_del_1.csv")

# Merge all datasets on player name
df = batting_df.merge(bowling_df, on="batsman", how="outer")
df = df.merge(venue_df, on="batsman", how="outer")
df = df.merge(history_df, on="batsman", how="outer", suffixes=('', '_hist'))

# Replace NaNs with 0
df.fillna(0, inplace=True)

# Remove duplicate columns if any
df = df.loc[:, ~df.columns.duplicated()]

# Rename for simplicity
df.rename(columns={"batsman": "player_name"}, inplace=True)

# ----------------------------
# 2. Take user input
# ----------------------------
print("\nAvailable Players:")
print(df["player_name"].unique().tolist())
user_squad = input("\nEnter your squad (comma-separated player names, at least 15): ").split(",")

venues = [col for col in df.columns if col not in ['player_name'] and not col.endswith('_hist')]
print("\nAvailable Venues:", venues)
selected_venue = input("\nEnter a venue name from above: ")

venue_col = selected_venue.strip()
if venue_col not in df.columns:
    print(f"Error: Venue column '{venue_col}' not found.")
    exit()

# Use selected venue and generate placeholder stats
df = df[["player_name", venue_col]].copy()
df["batting_avg"] = np.random.uniform(20, 50, len(df))
df["matches_played"] = np.random.randint(5, 20, len(df))
df["strike_rate"] = np.random.uniform(110, 150, len(df))

df.columns = ["player_name", "runs_scored", "batting_avg", "matches_played", "strike_rate"]

# Filter squad
df = df[df["player_name"].isin([p.strip() for p in user_squad])].reset_index(drop=True)

# Add dummy bowling stats for now
df["wickets"] = np.random.randint(0, 10, len(df))
df["econ_rate"] = np.random.uniform(5.0, 9.0, len(df))
df["venue_factor"] = 1.0  # Default (can later customize based on pitch)

# Normalize features
features = ["batting_avg", "strike_rate", "wickets", "econ_rate", "venue_factor"]
df[features] = (df[features] - df[features].mean()) / df[features].std()

# ----------------------------
# 3. DQN Agent Setup
# ----------------------------
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self.build_model()

    def build_model(self):
        model = Sequential([
            Dense(32, input_dim=self.state_size, activation='relu'),
            Dense(32, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state.reshape(1, -1), verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size=16):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state.reshape(1, -1), verbose=0))
            target_f = self.model.predict(state.reshape(1, -1), verbose=0)
            target_f[0][action] = target
            self.model.fit(state.reshape(1, -1), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# ----------------------------
# 4. Training the DQN Agent
# ----------------------------
state_size = len(features)
action_size = len(df)
agent = DQNAgent(state_size, action_size)

rewards_dict = {}

# Reward based on role
for idx, row in df.iterrows():
    batting_reward = 0.4 * row["batting_avg"] + 0.6 * row["strike_rate"]
    bowling_reward = 0.7 * row["wickets"] - 0.3 * row["econ_rate"]
    allrounder_reward = 0.5 * batting_reward + 0.5 * bowling_reward
    rewards_dict[row["player_name"]] = allrounder_reward * row["venue_factor"]

episodes = 300
for e in range(episodes):
    state = df[features].iloc[random.randint(0, len(df) - 1)].values
    for t in range(5):
        action = agent.act(state)
        reward = list(rewards_dict.values())[action]
        next_state = df[features].iloc[action].values
        done = t == 4
        agent.remember(state, action, reward, next_state, done)
        state = next_state
    agent.replay(16)

# ----------------------------
# 5. Display Top 11 Players
# ----------------------------
ranked_players = []
for idx, row in df.iterrows():
    state = row[features].values
    q_val = agent.model.predict(state.reshape(1, -1), verbose=0)
    ranked_players.append((row["player_name"], np.max(q_val)))

ranked_players = sorted(ranked_players, key=lambda x: x[1], reverse=True)

print("\n🏏 Selected Playing XI Based on DQN Ranking 🏏")
for i, (player, score) in enumerate(ranked_players[:11]):
    print(f"{i+1}. {player} (Score: {score:.2f})")


Available Players:
['A Ashish Reddy', 'A Chandila', 'A Chopra', 'A Choudhary', 'A Flintoff', 'A Kumble', 'A Mishra', 'A Mithun', 'A Mukund', 'A Nehra', 'A Singh', 'A Symonds', 'A Uniyal', 'A Zampa', 'AA Bilakhia', 'AA Chavan', 'AA Jhunjhunwala', 'AA Noffke', 'AB Agarkar', 'AB Barath', 'AB Dinda', 'AB McDonald', 'AB de Villiers', 'AC Blizzard', 'AC Gilchrist', 'AC Thomas', 'AC Voges', 'AD Mascarenhas', 'AD Mathews', 'AD Nath', 'AD Russell', 'AF Milne', 'AG Murtaza', 'AG Paunikar', 'AJ Finch', 'AJ Tye', 'AL Menaria', 'AM Nayar', 'AM Rahane', 'AN Ahmed', 'AN Ghosh', 'AP Dole', 'AP Majumdar', 'AP Tare', 'AR Bawne', 'AR Patel', 'AS Rajpoot', 'AS Raut', 'AS Yadav', 'AT Rayudu', 'AUK Pathan', 'Abdur Razzak', 'Anirudh Singh', 'Ankit Sharma', 'Ankit Soni', 'Anureet Singh', 'Azhar Mahmood', 'B Akhil', 'B Chipli', 'B Kumar', 'B Laughlin', 'B Lee', 'B Sumanth', 'BA Bhatt', 'BA Stokes', 'BAW Mendis', 'BB McCullum', 'BB Samantray', 'BB Sran', 'BCJ Cutting', 'BE Hendricks', 'BJ Haddin', 'BJ Hodge', 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Invalid dtype: object

In [3]:
print("Available Columns:\n", df.columns.tolist())


Available Columns:
 ['player_name', 'Bangalore_x', 'Chennai_x', 'Delhi_x', 'Hyderabad_x', 'Jaipur_x', 'Kochi_x', 'Kolkata_x', 'Mohali_x', 'Mumbai_x', 'Pune_x', 'Rajkot_x', 'Bangalore_y', 'Chennai_y', 'Delhi_y', 'Hyderabad_y', 'Jaipur_y', 'Kochi_y', 'Kolkata_y', 'Mohali_y', 'Mumbai_y', 'Pune_y', 'Rajkot_y', 'Bangalore', 'Chennai', 'Delhi', 'Hyderabad', 'Jaipur', 'Kochi', 'Kolkata', 'Mohali', 'Mumbai', 'Pune', 'Rajkot', 'Bangalore_hist', 'Chennai_hist', 'Delhi_hist', 'Hyderabad_hist', 'Jaipur_hist', 'Kochi_hist', 'Kolkata_hist', 'Mohali_hist', 'Mumbai_hist', 'Pune_hist', 'Rajkot_hist']
