# Problem Statement
Create a cricket match simulator  where an AI agent decides the best batting strategy to maximize runs and chase a target within a given number of balls.

# 1: Import Required Libraries

In [1]:
import numpy as np
import random
import matplotlib.pyplot as plt


# 2: Define the Cricket Environment

In [2]:
class MatchEnv:
    def __init__(self):
        self.max_balls = 6
        self.max_runs = 20
        self.action_space = 6  # 0 to 5 (e.g., defend, single, double, four, six, risky)
        self.reset()

    def reset(self):
        self.balls_left = self.max_balls
        self.runs_needed = self.max_runs
        self.done = False
        return (self.balls_left, self.runs_needed)

    def step(self, action):
        action_rewards = {
            0: 0,
            1: 1,
            2: 2,
            3: 4,
            4: 6,
            5: 0
        }

        reward = 0
        out = False

        if action == 5:
            if np.random.rand() < 0.5:
                reward = -10
                out = True
            else:
                reward = 6
                self.runs_needed -= 6
        else:
            runs = action_rewards[action]
            reward = runs
            self.runs_needed -= runs

        self.balls_left -= 1
        self.runs_needed = max(0, self.runs_needed)

        if self.balls_left == 0 or self.runs_needed == 0 or out:
            self.done = True

        return (self.balls_left, self.runs_needed), reward, self.done


# 3: Initialize Environment and Q-table

In [3]:
env = MatchEnv()
q_table = np.zeros((7, 21, env.action_space))  # (balls, runs, actions)


# 4: Define Training Parameters

In [4]:
episodes = 10000
learning_rate = 0.1
discount_factor = 0.95
epsilon = 1.0
epsilon_decay = 0.995
min_epsilon = 0.01


# 5: Training Loop

In [None]:
for episode in range(episodes):
    state = env.reset()

    while not env.done:
        balls, runs = state

        # Exploration vs Exploitation
        if np.random.rand() < epsilon:
            action = np.random.randint(env.action_space)
        else:
            action = np.argmax(q_table[balls, runs])

        next_state, reward, done = env.step(action)
        b, r = next_state

        
        q_table[balls, runs, action] = q_table[balls, runs, action] + learning_rate * (
            reward + discount_factor * np.max(q_table[b, r]) - q_table[balls, runs, action]
        )

        state = next_state

    # Decay epsilon
    epsilon = max(min_epsilon, epsilon * epsilon_decay)


# 6: Test the Trained Agent (with User Input)

In [6]:
user_balls_left = int(input("Enter balls left (0 to 6): "))
user_runs_needed = int(input("Enter runs needed (0 to 20): "))

if user_balls_left < 0 or user_balls_left > 6 or user_runs_needed < 0 or user_runs_needed > 20:
    print("Invalid input.")
else:
    state = (user_balls_left, user_runs_needed)
    env.balls_left = user_balls_left
    env.runs_needed = user_runs_needed
    env.done = False

    action_map = {
        0: "Defend",
        1: "Single",
        2: "Double",
        3: "Boundary (4)",
        4: "Six",
        5: "Risky Shot"
    }

    print("\n🧠 Agent's Decisions:\n")
    while not env.done:
        balls, runs = state
        action = np.argmax(q_table[balls, runs])
        next_state, reward, done = env.step(action)

        print(f"Balls Left: {balls}, Runs Needed: {runs}, "
              f"Action: {action_map[action]}, Reward: {reward}")

        state = next_state

    if env.runs_needed == 0:
        print("\n✅ Target Achieved!")
    else:
        print("\n❌ Target Not Achieved.")



🧠 Agent's Decisions:

Balls Left: 4, Runs Needed: 16, Action: Six, Reward: 6
Balls Left: 3, Runs Needed: 10, Action: Boundary (4), Reward: 4
Balls Left: 2, Runs Needed: 6, Action: Six, Reward: 6

✅ Target Achieved!


In [7]:
import pickle

# Assuming your Q-table is stored in a variable named q_table
with open("q_table.pkl", "wb") as f:
    pickle.dump(q_table, f)
