In [None]:
import numpy as np
import pandas as pd
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler, OneHotEncoder

class IPLTeamOptimizerRL:
    def __init__(self):
        # Replay memory to store experiences
        self.memory = deque(maxlen=2000)

        # DQN hyperparameters
        self.gamma = 0.95            # Discount factor
        self.epsilon = 1.0           # Exploration rate
        self.epsilon_min = 0.01      # Minimum epsilon
        self.epsilon_decay = 0.995   # Epsilon decay after each episode
        self.batch_size = 32         # Batch size for replay
        self.model = None            # Q-network
        self.scaler = StandardScaler()  # For feature normalization

        # Role constraints (team balance)
        self.role_requirements = {'Batsman': 4, 'Bowler': 3, 'AllRounder': 3, 'WK': 1}

        # Role encoder (not used directly in current flow)
        self.role_encoder = OneHotEncoder(categories=[['Batsman', 'Bowler', 'AllRounder', 'WK']], sparse_output=False)

        # Load and merge all datasets
        self.load_data()

    def load_data(self):
        # Read datasets
        self.batting_avg = pd.read_csv("batsman_avg_pivot_1.csv")
        self.strike_rate = pd.read_csv("batsman_sr_pivot_1.csv")
        self.matches_played = pd.read_csv("batsman_matches_pivot_1.csv")
        self.historical = pd.read_csv("final_del_1.csv")

        # Merge all datasets on 'batsman' column
        self.df = self.batting_avg.merge(self.strike_rate, on='batsman', suffixes=('_avg', '_sr')) \
                                  .merge(self.matches_played, on='batsman', suffixes=('', '_matches')) \
                                  .merge(self.historical, on='batsman', suffixes=('', '_hist'))

        # Rename column for consistency
        self.df.rename(columns={'batsman': 'player_name'}, inplace=True)

    def prepare_squad(self, squad_players, venue):
        # Filter only players in the user-defined squad
        self.squad_df = self.df[self.df['player_name'].isin(squad_players)].copy()
        self.venue = venue

        venue_stats = []
        for _, row in self.squad_df.iterrows():
            # Extract stats and classify role for each player
            stats = {
                'player_name': row['player_name'],
                'batting_avg': row[f'{venue}_avg'],
                'strike_rate': row[f'{venue}_sr'],
                'matches_played': row[venue],
                'historical_perf': row[f'{venue}_hist'],
                'role': self.classify_role(row, venue)
            }
            venue_stats.append(stats)

        # Create processed DataFrame
        self.players_df = pd.DataFrame(venue_stats)

        # Simulate bowling stats (can be replaced with real data)
        self.players_df['wickets'] = np.random.randint(0, 50, len(self.players_df))
        self.players_df['economy'] = np.random.uniform(6.0, 9.0, len(self.players_df))

        # Normalize numeric features
        numeric_features = ['batting_avg', 'strike_rate', 'matches_played', 'wickets', 'economy']
        self.players_df[numeric_features] = self.players_df[numeric_features].fillna(0).astype('float32')
        self.players_df[numeric_features] = self.scaler.fit_transform(self.players_df[numeric_features])

        # Set dimensions for state and action
        self.state_size = 9  # 4 role counts + 5 player features
        self.action_size = len(self.players_df)

        # Initialize Q-network
        self.build_model()

        return self.players_df

    def classify_role(self, player_data, venue):
        # Basic heuristic to classify a player's role
        avg = player_data.get(f'{venue}_avg', 0) or 0
        sr = player_data.get(f'{venue}_sr', 0) or 0
        matches = player_data.get(venue, 0) or 0

        if avg > 35 and sr > 130:
            if matches > 10 and random.random() > 0.7:
                return 'WK'  # Simulated WK detection
            return 'Batsman'
        elif avg < 20 and sr < 110:
            return 'Bowler'
        else:
            return 'AllRounder'

    def build_model(self):
        # Build a simple deep neural network for Q-value approximation
        self.model = Sequential([
            Dense(64, input_shape=(self.state_size,), activation='relu'),
            Dropout(0.2),
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(self.action_size, activation='linear')
        ])
        self.model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='mse')

    def get_state(self, selected_indices):
        # Form the current state as: [role_counts] + [next player's features]
        role_counts = np.zeros(4, dtype='float32')  # Batsman, Bowler, AllRounder, WK
        role_order = ['Batsman', 'Bowler', 'AllRounder', 'WK']

        for idx in selected_indices:
            role = self.players_df.iloc[idx]['role']
            role_counts[role_order.index(role)] += 1

        # Select next available player
        available = [i for i in range(self.action_size) if i not in selected_indices]
        if not available:
            return None

        next_player = self.players_df.iloc[available[0]]
        player_features = next_player[['batting_avg', 'strike_rate', 'matches_played', 'wickets', 'economy']].values

        return np.concatenate([role_counts, player_features]).astype('float32')

    def calculate_reward(self, selected_indices):
        # Return a negative reward if team is incomplete or unbalanced
        if len(selected_indices) != 11:
            return -5

        team = self.players_df.iloc[selected_indices]
        role_counts = team['role'].value_counts().to_dict()

        # Check if role requirements are fulfilled
        for role, req in self.role_requirements.items():
            if role_counts.get(role, 0) < req:
                return -10

        # Compute reward as a weighted combination of features
        batting_str = 0.6 * team['batting_avg'].mean() + 0.4 * team['strike_rate'].mean()
        bowling_str = 0.7 * team['wickets'].mean() - 0.3 * team['economy'].mean()
        venue_exp = np.log(team['matches_played'].sum() + 1) * 0.2

        return float(batting_str + bowling_str + venue_exp)

    def remember(self, state, action, reward, next_state, done):
        # Store experience in replay memory
        if state is not None and next_state is not None:
            self.memory.append((state, action, reward, next_state, done))

    def act(self, state, valid_actions):
        # Epsilon-greedy action selection
        if np.random.rand() <= self.epsilon:
            return random.choice(valid_actions)
        act_values = self.model.predict(state.reshape(1, -1), verbose=0)
        return valid_actions[np.argmax(act_values[0][valid_actions])]

    def replay(self):
        # Train model using a batch of past experiences
        if len(self.memory) < self.batch_size:
            return

        minibatch = random.sample(self.memory, self.batch_size)
        states = np.array([t[0] for t in minibatch], dtype='float32')
        actions = np.array([t[1] for t in minibatch])
        rewards = np.array([t[2] for t in minibatch], dtype='float32')
        next_states = np.array([t[3] for t in minibatch], dtype='float32')
        dones = np.array([t[4] for t in minibatch])

        targets = self.model.predict(states, verbose=0)
        q_next = self.model.predict(next_states, verbose=0)

        targets[range(self.batch_size), actions] = rewards + self.gamma * np.amax(q_next, axis=1) * (1 - dones)
        self.model.fit(states, targets, epochs=1, verbose=0, batch_size=self.batch_size)

        # Decay exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def train(self, episodes=500):
        # Train the agent over multiple episodes
        for e in range(episodes):
            selected = []
            state = self.get_state(selected)
            total_reward = 0

            for _ in range(11):
                valid_actions = [i for i in range(self.action_size) if i not in selected]
                if not valid_actions or state is None:
                    break

                action = self.act(state, valid_actions)
                selected.append(action)

                next_state = self.get_state(selected)
                done = len(selected) == 11
                reward = self.calculate_reward(selected) if done else 0

                self.remember(state, action, reward, next_state, done)
                total_reward += reward
                state = next_state

            self.replay()

            if e % 50 == 0:
                print(f"Episode {e}, Reward: {total_reward:.2f}, Epsilon: {self.epsilon:.2f}")

    def select_final_team(self):
        # Use trained model to select best Playing XI
        selected = []
        state = self.get_state(selected)

        for _ in range(11):
            valid_actions = [i for i in range(self.action_size) if i not in selected]
            if not valid_actions or state is None:
                break
            action = self.act(state, valid_actions)
            selected.append(action)
            state = self.get_state(selected)

        final_team = self.players_df.iloc[selected]
        return final_team.sort_values('batting_avg', ascending=False)

# ------------------ Execution ------------------
if __name__ == "__main__":
    optimizer = IPLTeamOptimizerRL()

    print("Available players:", optimizer.df['player_name'].unique())
    squad = input("Enter squad players (comma separated, min 15): ").strip().split(',')
    venue = input("Enter venue: ").strip()

    players_df = optimizer.prepare_squad([p.strip() for p in squad], venue)
    print(f"\nInitialized with {len(players_df)} players at {venue}")

    print("\nTraining the DQN model...")
    optimizer.train(episodes=300)

    final_team = optimizer.select_final_team()

    print("\n🏏 Optimized Playing XI:")
    print(final_team[['player_name']].to_string(index=False))


Available players: ['A Ashish Reddy' 'A Chandila' 'A Chopra' 'A Choudhary' 'A Flintoff'
 'A Kumble' 'A Mishra' 'A Mithun' 'A Mukund' 'A Nehra' 'A Singh'
 'A Symonds' 'A Uniyal' 'A Zampa' 'AA Bilakhia' 'AA Chavan'
 'AA Jhunjhunwala' 'AA Noffke' 'AB Agarkar' 'AB Barath' 'AB Dinda'
 'AB McDonald' 'AB de Villiers' 'AC Blizzard' 'AC Gilchrist' 'AC Thomas'
 'AC Voges' 'AD Mascarenhas' 'AD Mathews' 'AD Nath' 'AD Russell'
 'AF Milne' 'AG Murtaza' 'AG Paunikar' 'AJ Finch' 'AJ Tye' 'AL Menaria'
 'AM Nayar' 'AM Rahane' 'AN Ahmed' 'AN Ghosh' 'AP Dole' 'AP Majumdar'
 'AP Tare' 'AR Bawne' 'AR Patel' 'AS Rajpoot' 'AS Raut' 'AS Yadav'
 'AT Rayudu' 'AUK Pathan' 'Abdur Razzak' 'Anirudh Singh' 'Ankit Sharma'
 'Ankit Soni' 'Anureet Singh' 'Azhar Mahmood' 'B Akhil' 'B Chipli'
 'B Kumar' 'B Laughlin' 'B Lee' 'B Sumanth' 'BA Bhatt' 'BA Stokes'
 'BAW Mendis' 'BB McCullum' 'BB Samantray' 'BB Sran' 'BCJ Cutting'
 'BE Hendricks' 'BJ Haddin' 'BJ Hodge' 'BJ Rohrer' 'BMAJ Mendis' 'BR Dunk'
 'Basil Thampi' 'Bipul Sh

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Initialized with 15 players at Chennai

Training the DQN model...
Episode 0, Reward: -10.00, Epsilon: 1.00
Episode 50, Reward: -10.00, Epsilon: 0.78
Episode 100, Reward: -10.00, Epsilon: 0.61
Episode 150, Reward: -10.00, Epsilon: 0.47
Episode 200, Reward: -10.00, Epsilon: 0.37
Episode 250, Reward: -10.00, Epsilon: 0.29

Optimized Playing XI:
    player_name
      SR Watson
       MS Dhoni
       SK Raina
       DJ Bravo
      KM Jadhav
   F du Plessis
      AT Rayudu
      KV Sharma
Harbhajan Singh
      SN Thakur
      DL Chahar
