<a href="https://colab.research.google.com/github/Stephenthomas10/Projects-and-implementations/blob/main/WPL_MATCH_SIMULATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import yaml
import random
import os
import zipfile

# Step 1: Unzip the uploaded WPL dataset
ZIP_FILE_PATH = "/content/wpl.zip"  # Update with the correct uploaded file path
EXTRACTED_FOLDER = "/content/wpl_yaml_files"

# Extract the ZIP file
with zipfile.ZipFile(ZIP_FILE_PATH, 'r') as zip_ref:
    zip_ref.extractall(EXTRACTED_FOLDER)

# Get a list of all YAML files
yaml_files = [os.path.join(EXTRACTED_FOLDER, f) for f in os.listdir(EXTRACTED_FOLDER) if f.endswith(".yaml")]

print(f"Total YAML files found: {len(yaml_files)}")

# Step 2: Function to load YAML match data
def load_yaml(file_path):
    with open(file_path, "r") as file:
        data = yaml.safe_load(file)
    return data

# Step 3: Process all YAML files to extract ball-by-ball data
balls = []
for file in yaml_files:
    match_data = load_yaml(file)
    if "innings" not in match_data:
        continue  # Skip if no innings data is found

    innings_data = match_data["innings"]

    for i, inning in enumerate(innings_data):
        inning_name = f"Inning {i+1}"
        deliveries = list(inning.values())[0]["deliveries"]

        for delivery in deliveries:
            for ball_number, details in delivery.items():
                balls.append({
                    "inning": inning_name,
                    "over": int(ball_number),
                    "batter": details["batsman"],
                    "bowler": details["bowler"],
                    "runs": details["runs"]["batsman"],
                    "extras": details["runs"].get("extras", 0),
                    "wicket": 1 if "wicket" in details else 0,
                    "dismissal": details.get("wicket", {}).get("kind", None)
                })

# Convert to DataFrame
df = pd.DataFrame(balls)
print(f"Total deliveries processed: {len(df)}")

# Step 4: Create probability distributions
batsman_prob = df.groupby("batter")["runs"].value_counts(normalize=True).unstack(fill_value=0)
bowler_prob = df.groupby("bowler")["runs"].value_counts(normalize=True).unstack(fill_value=0)
wicket_prob = df.groupby("bowler")["wicket"].mean()

# Step 5: Function to simulate a ball based on probabilities
def simulate_ball(batter, bowler):
    run_probs = batsman_prob.loc[batter] if batter in batsman_prob.index else batsman_prob.mean()
    bowler_wicket_chance = wicket_prob.get(bowler, wicket_prob.mean())

    runs = np.random.choice(run_probs.index, p=run_probs.values)
    is_wicket = np.random.rand() < bowler_wicket_chance

    return {
        "batter": batter,
        "bowler": bowler,
        "runs": runs,
        "wicket": int(is_wicket)
    }

# Step 6: Set up teams
team_MI = ["Batter1_MI", "Batter2_MI", "Batter3_MI", "Batter4_MI", "Batter5_MI", "Batter6_MI"]
team_DC = ["Batter1_DC", "Batter2_DC", "Batter3_DC", "Batter4_DC", "Batter5_DC", "Batter6_DC"]
bowlers_MI = ["Bowler1_MI", "Bowler2_MI", "Bowler3_MI", "Bowler4_MI", "Bowler5_MI"]
bowlers_DC = ["Bowler1_DC", "Bowler2_DC", "Bowler3_DC", "Bowler4_DC", "Bowler5_DC"]

# Step 7: Simulate the full MI vs DC match
match_simulation = []
for inning, (batting_team, bowling_team, bowlers) in enumerate([(team_MI, team_DC, bowlers_DC), (team_DC, team_MI, bowlers_MI)]):
    out_count = 0
    total_runs = 0
    batter_index = 0
    striker = batting_team[batter_index]
    non_striker = batting_team[batter_index + 1]

    for over in range(1, 21):  # Simulate 20 overs
        bowler = random.choice(bowlers)
        for ball in range(1, 7):  # 6 balls per over
            if out_count >= len(batting_team) - 1:
                break  # All out

            ball_result = simulate_ball(striker, bowler)
            total_runs += ball_result["runs"]
            ball_result.update({"inning": inning + 1, "over": over, "ball": ball})

            if ball_result["wicket"]:
                out_count += 1
                if out_count < len(batting_team) - 1:
                    striker = batting_team[out_count + 1]  # New batter in

            match_simulation.append(ball_result)

            # Rotate strike on 1,3,5 runs
            if ball_result["runs"] in [1, 3, 5]:
                striker, non_striker = non_striker, striker

        if out_count >= len(batting_team) - 1:
            break  # End innings

# Step 8: Convert simulation data into DataFrame and save
simulated_df = pd.DataFrame(match_simulation)

# Save to CSV
simulated_df.to_csv("simulated_match.csv", index=False)
print("Simulation complete! Data saved as simulated_match.csv")

# Show first 10 rows
simulated_df.head(10)


Total YAML files found: 44
Total deliveries processed: 10176
Simulation complete! Data saved as simulated_match.csv


Unnamed: 0,batter,bowler,runs,wicket,inning,over,ball
0,Batter1_MI,Bowler1_DC,4,0,1,1,1
1,Batter1_MI,Bowler1_DC,0,0,1,1,2
2,Batter1_MI,Bowler1_DC,0,0,1,1,3
3,Batter1_MI,Bowler1_DC,1,0,1,1,4
4,Batter2_MI,Bowler1_DC,1,0,1,1,5
5,Batter1_MI,Bowler1_DC,4,0,1,1,6
6,Batter1_MI,Bowler2_DC,1,0,1,2,1
7,Batter2_MI,Bowler2_DC,0,0,1,2,2
8,Batter2_MI,Bowler2_DC,1,0,1,2,3
9,Batter1_MI,Bowler2_DC,2,0,1,2,4


In [None]:
import pandas as pd

# Load the simulated match data
df = pd.read_csv("simulated_match.csv")

# Step 1: Process Batting Scorecard
def batting_scorecard(inning):
    batsmen = df[df["inning"] == inning].groupby("batter").agg(
        Runs=("runs", "sum"),
        Balls=("batter", "count"),
        Fours=("runs", lambda x: (x == 4).sum()),
        Sixes=("runs", lambda x: (x == 6).sum()),
        Wickets=("wicket", "sum")
    ).reset_index()

    batsmen["Strike Rate"] = round((batsmen["Runs"] / batsmen["Balls"]) * 100, 2)
    return batsmen.sort_values(by="Runs", ascending=False)

# Step 2: Process Bowling Scorecard
def bowling_scorecard(inning):
    bowlers = df[df["inning"] == inning].groupby("bowler").agg(
        Overs=("ball", lambda x: len(x) // 6),
        Runs=("runs", "sum"),
        Wickets=("wicket", "sum")
    ).reset_index()

    bowlers["Economy"] = round(bowlers["Runs"] / (bowlers["Overs"] + 0.1), 2)
    return bowlers.sort_values(by="Wickets", ascending=False)

# Step 3: Calculate Total Scores & Winner
score1 = df[df["inning"] == 1]["runs"].sum()
score2 = df[df["inning"] == 2]["runs"].sum()
winner = "Team 1 (Batting 1st)" if score1 > score2 else "Team 2 (Batting 2nd)"

# Print Final Scorecard
print(f"🏆 **Final Score:**")
print(f"📢 Team 1: {score1} Runs")
print(f"📢 Team 2: {score2} Runs")
print(f"🎉 Winner: {winner}\n")

print("📊 **Batting Scorecard - Inning 1**")
print(batting_scorecard(1))

print("\n📊 **Bowling Scorecard - Inning 1**")
print(bowling_scorecard(1))

print("\n📊 **Batting Scorecard - Inning 2**")
print(batting_scorecard(2))

print("\n📊 **Bowling Scorecard - Inning 2**")
print(bowling_scorecard(2))


🏆 **Final Score:**
📢 Team 1: 128 Runs
📢 Team 2: 95 Runs
🎉 Winner: Team 1 (Batting 1st)

📊 **Batting Scorecard - Inning 1**
       batter  Runs  Balls  Fours  Sixes  Wickets  Strike Rate
0  Batter1_MI    49     44      7      0        1       111.36
2  Batter3_MI    34     38      3      1        0        89.47
3  Batter4_MI    30     18      5      1        1       166.67
1  Batter2_MI    14     15      0      1        1        93.33
5  Batter6_MI     1      2      0      0        0        50.00
4  Batter5_MI     0      3      0      0        1         0.00

📊 **Bowling Scorecard - Inning 1**
       bowler  Overs  Runs  Wickets  Economy
0  Bowler1_DC      5    47        1     9.22
1  Bowler2_DC      4    21        1     5.12
3  Bowler4_DC      4    16        1     3.90
4  Bowler5_DC      5    22        1     4.31
2  Bowler3_DC      2    22        0    10.48

📊 **Batting Scorecard - Inning 2**
       batter  Runs  Balls  Fours  Sixes  Wickets  Strike Rate
2  Batter3_DC    29     34     