In [2]:
from collections import defaultdict
import os
from datetime import datetime
import pandas as pd
from Helpers import add_runner_states, add_game_state, add_runs_remaining, calculate_zero_run_probabilities  # assumes same helpers as Colab

Code for first month data summary

In [3]:
def build_gamestate_summary_local(data_root, save_path, test_first_only=True):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})

    # ✅ Sort years
    years = sorted([y for y in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, y))])
    if test_first_only:
        years = years[:1]  # ✅ Process only first year

    for year in years:
        year_path = os.path.join(data_root, year)

        # ✅ Sort months
        months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])
        if test_first_only:
            months = months[:1]  # ✅ Process only first month

        for month in months:
            month_path = os.path.join(year_path, month)

            print(f"\n📅 Processing {year}-{month} from local files ...")

            for day in os.listdir(month_path):
                day_csv_path = os.path.join(month_path, day, "CSV")
                if not os.path.exists(day_csv_path):
                    continue

                for file in os.listdir(day_csv_path):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(day_csv_path, file)

                    try:
                        df = pd.read_csv(file_path)

                        required = {'Inning','Top/Bottom','Outs','Balls','Strikes','RunsScored','PlayResult'}
                        if not required.issubset(df.columns):
                            continue

                        df = df[df['Inning'] < 9]
                        if df.empty:
                            continue

                        df = add_runner_states(df)
                        df = add_game_state(df)
                        df = add_runs_remaining(df)

                        agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                        for state, row in agg.iterrows():
                            summary[state]["Count"] += row['count']
                            summary[state]["TotalRunsRemaining"] += row['sum']

                    except Exception:
                        continue

            # ✅ Save monthly summary with optional suffix
            monthly_df = pd.DataFrame([
                {
                    "GameState": s,
                    "Count": d["Count"],
                    "TotalRunsRemaining": d["TotalRunsRemaining"],
                    "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0
                }
                for s, d in summary.items()
            ])
            os.makedirs(save_path, exist_ok=True)
            timestamp = datetime.now().strftime("%Y%m%d_%H%M")
            monthly_path = os.path.join(save_path, f"GameState_Summary_{year}_{month}_{timestamp}.csv")
            monthly_df.to_csv(monthly_path, index=False)
            print(f"💾 Saved summary: {monthly_path}")

            if test_first_only:
                print("🧪 Test mode enabled — stopping after first month.")
                break  # stop after first month

        if test_first_only:
            break  # stop after first year

    print("\n✅ Local run complete.")
    return pd.DataFrame(summary)

In [4]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_local(data_root, save_path, test_first_only=True)
summary_df.head()


📅 Processing 2024-02 from local files ...
💾 Saved summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_2024_02_20251022_1525.csv
🧪 Test mode enabled — stopping after first month.

✅ Local run complete.


Unnamed: 0,000-O0-B0-S0,000-O0-B0-S1,000-O0-B0-S2,000-O0-B1-S0,000-O0-B1-S1,000-O0-B1-S2,000-O0-B2-S0,000-O0-B2-S1,000-O0-B2-S2,000-O0-B3-S0,...,101-O0-B1-S2,101-O0-B0-S2,001-O1-B3-S1,101-O1-B3-S1,011-O0-B3-S0,101-O2-B3-S2,101-O0-B3-S0,001-O2-B3-S0,001-O1-B3-S0,101-O2-B3-S1
Count,12110,5630,2724,4938,4722,4051,1854,2656,3490,674,...,11,22,11,14,4,23,6,7,5,14
TotalRunsRemaining,10179,4326,1751,4444,3808,2743,1955,2490,2691,886,...,19,39,16,18,7,2,10,7,9,7


The below code is for 2 months summary

In [None]:
def build_gamestate_summary_local(data_root, save_path, test_first_only=True):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})

    # ✅ Sort years
    years = sorted([y for y in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, y))])
    if test_first_only:
        years = years[:1]  # ✅ Process only first year

    for year in years:
        year_path = os.path.join(data_root, year)

        # ✅ Sort months
        months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])
        if test_first_only:
            months = months[:2]  # ✅ Process only first TWO months

        month_range = f"{months[0]}to{months[-1]}" if len(months) > 1 else months[0]

        for month in months:
            month_path = os.path.join(year_path, month)
            print(f"\n📅 Processing {year}-{month} from local files ...")

            for day in os.listdir(month_path):
                day_csv_path = os.path.join(month_path, day, "CSV")
                if not os.path.exists(day_csv_path):
                    continue

                for file in os.listdir(day_csv_path):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(day_csv_path, file)

                    try:
                        df = pd.read_csv(file_path)

                        required = {'Inning','Top/Bottom','Outs','Balls','Strikes','RunsScored','PlayResult'}
                        if not required.issubset(df.columns):
                            continue

                        df = df[df['Inning'] < 9]
                        if df.empty:
                            continue

                        df = add_runner_states(df)
                        df = add_game_state(df)
                        df = add_runs_remaining(df)

                        agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                        for state, row in agg.iterrows():
                            summary[state]["Count"] += row['count']okok
                            summary[state]["TotalRunsRemaining"] += row['sum']

                    except Exception:
                        continue

        # ✅ Save combined monthly summary after all selected months
        monthly_df = pd.DataFrame([
            {
                "GameState": s,
                "Count": d["Count"],
                "TotalRunsRemaining": d["TotalRunsRemaining"],
                "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0
            }
            for s, d in summary.items()
        ])
        os.makedirs(save_path, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M")
        filename = f"GameState_Summary_{year}_{month_range}_{timestamp}.csv"
        monthly_path = os.path.join(save_path, filename)
        monthly_df.to_csv(monthly_path, index=False)
        print(f"\n💾 Saved summary: {monthly_path}")

        if test_first_only:
            print("🧪 Test mode enabled — stopping after first few months.")
            break

    print("\n✅ Local run complete.")
    return pd.DataFrame(summary)

In [8]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_local(data_root, save_path, test_first_only=True)
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

💾 Saved summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_2024_02to03_20251022_1636.csv
🧪 Test mode enabled — stopping after first few months.

✅ Local run complete.


Unnamed: 0,000-O0-B0-S0,000-O0-B0-S1,000-O0-B0-S2,000-O0-B1-S0,000-O0-B1-S1,000-O0-B1-S2,000-O0-B2-S0,000-O0-B2-S1,000-O0-B2-S2,000-O0-B3-S0,...,001-O2-B3-S0,001-O1-B3-S0,101-O2-B3-S1,110-O1-B4-S2,000-O2-B1-S3,100-O3-B0-S1,000-O3-B1-S3,110-O2-B2-S3,110-O2-B2-S4,100-O2-B1-S3
Count,41592,19140,9342,16998,16053,14071,6428,9010,12129,2371,...,16,17,40,1,1,1,1,1,1,1
TotalRunsRemaining,34039,14173,5773,15154,12719,9391,6782,8355,9134,3007,...,10,22,22,0,0,0,0,0,0,1


Getting Feb files count which are used for summary generation

In [2]:
import os
import pandas as pd

feb_path = "/Users/suma/Downloads/Baseball_Project/v3/2024/02"
valid_file_count = 0

for day in os.listdir(feb_path):
    day_csv_path = os.path.join(feb_path, day, "CSV")
    if not os.path.exists(day_csv_path):
        continue

    for file in os.listdir(day_csv_path):
        # ✅ Filter out irrelevant/unusable files
        if "_unverified" in file or "playerpositioning" in file:
            continue

        file_path = os.path.join(day_csv_path, file)
        try:
            df = pd.read_csv(file_path)

            # ✅ Count the valid file
            valid_file_count += 1

        except Exception:
            continue

print(f"✅ Valid files used for summary (Feb 2024): {valid_file_count}")


✅ Valid files used for summary (Feb 2024): 720


code for  all files count for feb month

In [11]:
import os

feb_path = "/Users/suma/Downloads/Baseball_Project/v3/2024/02"
total_file_count = 0

for day in os.listdir(feb_path):
    day_csv_path = os.path.join(feb_path, day, "CSV")
    if not os.path.exists(day_csv_path):
        continue

    for file in os.listdir(day_csv_path):
        if file.endswith(".csv"):
            total_file_count += 1

print(f"📁 Total CSV files in February 2024: {total_file_count}")

📁 Total CSV files in February 2024: 1863


Unverified files count

In [12]:
import os

feb_path = "/Users/suma/Downloads/Baseball_Project/v3/2024/02"
unverified_count = 0

for day in os.listdir(feb_path):
    day_csv_path = os.path.join(feb_path, day, "CSV")
    if not os.path.exists(day_csv_path):
        continue

    for file in os.listdir(day_csv_path):
        if "_unverified" in file and file.endswith(".csv"):
            unverified_count += 1

print(f"🛑 Unverified CSV files (Feb 2024): {unverified_count}")

🛑 Unverified CSV files (Feb 2024): 1143


Function for calling data to get summary

In [None]:
def build_gamestate_summary_local(data_root, save_path, year_to_process, months_to_process):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})

    year_path = os.path.join(data_root, year_to_process)

    # Ensure only requested months are processed
    months = sorted([
        m for m in os.listdir(year_path)
        if os.path.isdir(os.path.join(year_path, m)) and m in months_to_process
    ])

    for month in months:
        month_path = os.path.join(year_path, month)
        print(f"\n📅 Processing {year_to_process}-{month} from local files ...")

        for day in os.listdir(month_path):
            day_csv_path = os.path.join(month_path, day, "CSV")
            if not os.path.exists(day_csv_path):
                continue

            for file in os.listdir(day_csv_path):
                if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                    continue

                file_path = os.path.join(day_csv_path, file)

                try:
                    df = pd.read_csv(file_path)
                    required = {'Inning','Top/Bottom','Outs','Balls','Strikes','RunsScored','PlayResult'}
                    if not required.issubset(df.columns):
                        continue

                    df = df[df['Inning'] < 9]
                    if df.empty:
                        continue

                    df = add_runner_states(df)
                    df = add_game_state(df)
                    df = add_runs_remaining(df)

                    agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                    for state, row in agg.iterrows():
                        summary[state]["Count"] += row['count']
                        summary[state]["TotalRunsRemaining"] += row['sum']

                except Exception:
                    continue

    # ✅ Save combined summary
    monthly_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])
    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    month_range = f"{months[0]}to{months[-1]}" if len(months) > 1 else months[0]
    filename = f"GameState_Summary_{year_to_process}_{month_range}_{timestamp}.csv"
    monthly_path = os.path.join(save_path, filename)
    monthly_df.to_csv(monthly_path, index=False)

    print(f"\n💾 Saved summary: {monthly_path}")
    print("\n✅ Local run complete.")
    return pd.DataFrame(summary)

In [None]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

# Example: Process Feb and Mar 2024
summary_df = build_gamestate_summary_local(data_root, save_path, year_to_process="2024", months_to_process=["02", "03"])
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

💾 Saved summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_2024_02to03_20251022_1722.csv

✅ Local run complete.


Unnamed: 0,000-O0-B0-S0,000-O0-B0-S1,000-O0-B0-S2,000-O0-B1-S0,000-O0-B1-S1,000-O0-B1-S2,000-O0-B2-S0,000-O0-B2-S1,000-O0-B2-S2,000-O0-B3-S0,...,001-O2-B3-S0,001-O1-B3-S0,101-O2-B3-S1,110-O1-B4-S2,000-O2-B1-S3,100-O3-B0-S1,000-O3-B1-S3,110-O2-B2-S3,110-O2-B2-S4,100-O2-B1-S3
Count,41592,19140,9342,16998,16053,14071,6428,9010,12129,2371,...,16,17,40,1,1,1,1,1,1,1
TotalRunsRemaining,34039,14173,5773,15154,12719,9391,6782,8355,9134,3007,...,10,22,22,0,0,0,0,0,0,1


To get count for march Month

In [4]:
import os
import pandas as pd

feb_path = "/Users/suma/Downloads/Baseball_Project/v3/2024/03"
valid_file_count = 0

for day in os.listdir(feb_path):
    day_csv_path = os.path.join(feb_path, day, "CSV")
    if not os.path.exists(day_csv_path):
        continue

    for file in os.listdir(day_csv_path):
        # ✅ Filter out irrelevant/unusable files
        if "_unverified" in file or "playerpositioning" in file:
            continue

        file_path = os.path.join(day_csv_path, file)
        try:
            df = pd.read_csv(file_path)

            # ✅ Count the valid file
            valid_file_count += 1

        except Exception:
            continue

print(f"✅ Valid files used for summary (March 2024): {valid_file_count}")


✅ Valid files used for summary (March 2024): 1764


Getting March summary

In [13]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"


summary_df = build_gamestate_summary_local(data_root, save_path, year_to_process="2024", months_to_process=["03"])
summary_df.head()


📅 Processing 2024-03 from local files ...

💾 Saved summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_2024_03_20251022_1741.csv

✅ Local run complete.


Unnamed: 0,000-O0-B0-S0,000-O0-B0-S1,000-O0-B0-S2,000-O0-B1-S0,000-O0-B1-S1,000-O0-B1-S2,000-O0-B2-S0,000-O0-B2-S1,000-O0-B2-S2,000-O0-B3-S2,...,110-O1-B4-S2,001-O0-B2-S2,101-O2-B3-S0,000-O2-B1-S3,011-O0-B3-S0,100-O3-B0-S1,000-O3-B1-S3,110-O2-B2-S3,110-O2-B2-S4,100-O2-B1-S3
Count,29482,13510,6618,12060,11331,10020,4574,6354,8639,5692,...,1,36,6,1,14,1,1,1,1,1
TotalRunsRemaining,23860,9847,4022,10710,8911,6648,4827,5865,6443,5289,...,0,59,8,0,31,0,0,0,0,1


2024 valid files count

In [19]:
def count_valid_files_2024(data_root, year="2024"):
    valid_file_count = 0
    year_path = os.path.join(data_root, year)

    for month in sorted(os.listdir(year_path)):
        month_path = os.path.join(year_path, month)
        if not os.path.isdir(month_path):
            continue

        for day in os.listdir(month_path):
            day_csv_path = os.path.join(month_path, day, "CSV")
            if not os.path.exists(day_csv_path):
                continue

            for file in os.listdir(day_csv_path):
                if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                    continue

                file_path = os.path.join(day_csv_path, file)
                try:
                    df = pd.read_csv(file_path)
                    required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                    if not required.issubset(df.columns):
                        continue
                    if df[df["Inning"] < 9].empty:
                        continue
                    valid_file_count += 1
                except Exception:
                    continue

    print(f"\n✅ Total valid files used for summary (2024): {valid_file_count}")

# 🔧 Use your base path here
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
count_valid_files_2024(data_root)


✅ Total valid files used for summary (2024): 5514


2024 year data

In [14]:
def build_gamestate_summary_local(data_root, save_path, year_to_process):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})
    year_path = os.path.join(data_root, year_to_process)

    # ✅ Get all available months (01–12) automatically
    months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])

    for month in months:
        month_path = os.path.join(year_path, month)
        print(f"\n📅 Processing {year_to_process}-{month} from local files ...")

        for day in os.listdir(month_path):
            day_csv_path = os.path.join(month_path, day, "CSV")
            if not os.path.exists(day_csv_path):
                continue

            for file in os.listdir(day_csv_path):
                if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                    continue

                file_path = os.path.join(day_csv_path, file)

                try:
                    df = pd.read_csv(file_path)
                    required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                    if not required.issubset(df.columns):
                        continue

                    df = df[df['Inning'] < 9]
                    if df.empty:
                        continue

                    df = add_runner_states(df)
                    df = add_game_state(df)
                    df = add_runs_remaining(df)

                    agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                    for state, row in agg.iterrows():
                        summary[state]["Count"] += row['count']
                        summary[state]["TotalRunsRemaining"] += row['sum']

                except Exception:
                    continue

    # ✅ Save combined yearly summary
    monthly_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])

    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    filename = f"GameState_Summary_{year_to_process}_01to12_{timestamp}.csv"
    monthly_path = os.path.join(save_path, filename)
    monthly_df.to_csv(monthly_path, index=False)

    print(f"\n💾 Saved yearly summary: {monthly_path}")
    print("\n✅ Local run complete.")
    return pd.DataFrame(summary)

In [None]:
# ---- Run for all 2024 data ----
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_local(data_root, save_path, year_to_process="2024")
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

📅 Processing 2024-04 from local files ...

📅 Processing 2024-05 from local files ...

📅 Processing 2024-06 from local files ...

📅 Processing 2024-07 from local files ...

📅 Processing 2024-08 from local files ...

📅 Processing 2024-09 from local files ...

📅 Processing 2024-10 from local files ...

📅 Processing 2024-11 from local files ...

📅 Processing 2024-12 from local files ...

💾 Saved yearly summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_2024_01to12_20251022_1807.csv

✅ Local run complete.


Unnamed: 0,000-O0-B0-S0,000-O0-B0-S1,000-O0-B0-S2,000-O0-B1-S0,000-O0-B1-S1,000-O0-B1-S2,000-O0-B2-S0,000-O0-B2-S1,000-O0-B2-S2,000-O0-B3-S0,...,111-O2-B4-S2,000-O0-B4-S1,000-O0-B4-S2,000-O2-B0-S3,010-O2-B0-S3,100-O2-B0-S3,110-O2-B0-S3,000-O3-B0-S0,000-O3-B0-S3,011-O0-B4-S0
Count,93667,42414,20763,37727,35291,30887,14106,19942,27046,5160,...,1,1,1,3,1,4,1,2,1,1
TotalRunsRemaining,74987,30917,12971,33412,27709,20744,14545,17895,20224,6577,...,0,0,0,8,1,21,6,1,2,0


2025 valid files count

In [20]:
def count_valid_files_2024(data_root, year="2025"):
    valid_file_count = 0
    year_path = os.path.join(data_root, year)

    for month in sorted(os.listdir(year_path)):
        month_path = os.path.join(year_path, month)
        if not os.path.isdir(month_path):
            continue

        for day in os.listdir(month_path):
            day_csv_path = os.path.join(month_path, day, "CSV")
            if not os.path.exists(day_csv_path):
                continue

            for file in os.listdir(day_csv_path):
                if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                    continue

                file_path = os.path.join(day_csv_path, file)
                try:
                    df = pd.read_csv(file_path)
                    required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                    if not required.issubset(df.columns):
                        continue
                    if df[df["Inning"] < 9].empty:
                        continue
                    valid_file_count += 1
                except Exception:
                    continue

    print(f"\n✅ Total valid files used for summary (2025): {valid_file_count}")

# 🔧 Use your base path here
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
count_valid_files_2024(data_root)


✅ Total valid files used for summary (2025): 7400


2025 summary

In [23]:
def build_gamestate_summary_local(data_root, save_path, year_to_process):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})
    year_path = os.path.join(data_root, year_to_process)

    # ✅ Get all available months (01–12) automatically
    months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])

    for month in months:
        month_path = os.path.join(year_path, month)
        print(f"\n📅 Processing {year_to_process}-{month} from local files ...")

        for day in os.listdir(month_path):
            day_csv_path = os.path.join(month_path, day, "CSV")
            if not os.path.exists(day_csv_path):
                continue

            for file in os.listdir(day_csv_path):
                if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                    continue

                file_path = os.path.join(day_csv_path, file)

                try:
                    df = pd.read_csv(file_path)
                    required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                    if not required.issubset(df.columns):
                        continue

                    df = df[df['Inning'] < 9]
                    if df.empty:
                        continue

                    df = add_runner_states(df)
                    df = add_game_state(df)
                    df = add_runs_remaining(df)

                    agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                    for state, row in agg.iterrows():
                        summary[state]["Count"] += row['count']
                        summary[state]["TotalRunsRemaining"] += row['sum']

                except Exception:
                    continue

    # ✅ Save combined yearly summary
    monthly_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])

    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    filename = f"GameState_Summary_{year_to_process}_01to10_{timestamp}.csv"
    monthly_path = os.path.join(save_path, filename)
    monthly_df.to_csv(monthly_path, index=False)


    print(f"\n💾 Saved yearly summary: {monthly_path}")
    print("\n✅ Local run complete.")
    return pd.DataFrame(summary)

In [24]:
# ---- Run for all 2024 data ----
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_local(data_root, save_path, year_to_process="2025")
summary_df.head()


📅 Processing 2025-01 from local files ...

📅 Processing 2025-02 from local files ...

📅 Processing 2025-03 from local files ...

📅 Processing 2025-04 from local files ...

📅 Processing 2025-05 from local files ...

📅 Processing 2025-06 from local files ...

📅 Processing 2025-07 from local files ...

📅 Processing 2025-08 from local files ...

📅 Processing 2025-09 from local files ...

📅 Processing 2025-10 from local files ...

💾 Saved yearly summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_2025_01to10_20251022_1843.csv

✅ Local run complete.


Unnamed: 0,000-O0-B0-S0,000-O0-B0-S1,000-O0-B0-S2,000-O0-B1-S0,000-O0-B1-S1,000-O0-B1-S2,000-O0-B2-S0,000-O0-B2-S1,000-O0-B2-S2,000-O0-B3-S0,...,000-O3-B2-S2,110-O1-B4-S0,000-O3-B0-S2,100-O3-B0-S2,100-O3-B1-S0,110-O3-B0-S2,110-O3-B2-S2,000-O0-B4-S0,110-O3-B3-S2,100-O0-B4-S2
Count,122794,56529,27670,49679,46627,41376,18332,26209,35654,6557,...,1,1,1,1,1,1,1,1,1,1
TotalRunsRemaining,98725,41172,17218,43275,36277,27256,18419,23015,25981,7939,...,0,0,0,0,0,0,0,0,0,0


Total Data

In [16]:
def build_gamestate_summary_all_years(data_root, save_path):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})

    # 🔍 Get all available years in the data root
    years = sorted([y for y in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, y))])

    for year in years:
        year_path = os.path.join(data_root, year)
        months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])

        for month in months:
            month_path = os.path.join(year_path, month)
            print(f"\n📅 Processing {year}-{month} from local files ...")

            for day in os.listdir(month_path):
                day_csv_path = os.path.join(month_path, day, "CSV")
                if not os.path.exists(day_csv_path):
                    continue

                for file in os.listdir(day_csv_path):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(day_csv_path, file)

                    try:
                        df = pd.read_csv(file_path)
                        required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                        if not required.issubset(df.columns):
                            continue

                        df = df[df['Inning'] < 9]
                        if df.empty:
                            continue

                        df = add_runner_states(df)
                        df = add_game_state(df)
                        df = add_runs_remaining(df)

                        agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                        for state, row in agg.iterrows():
                            summary[state]["Count"] += row['count']
                            summary[state]["TotalRunsRemaining"] += row['sum']

                    except Exception:
                        continue

    # 💾 Save final combined summary
    combined_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])

    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    filename = f"GameState_Summary_ALL_{timestamp}.csv"
    summary_path = os.path.join(save_path, filename)
    combined_df.to_csv(summary_path, index=False)

    print(f"\n✅ All-year summary saved: {summary_path}")
    return combined_df

In [17]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_all_years(data_root, save_path)
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

📅 Processing 2024-04 from local files ...

📅 Processing 2024-05 from local files ...

📅 Processing 2024-06 from local files ...

📅 Processing 2024-07 from local files ...

📅 Processing 2024-08 from local files ...

📅 Processing 2024-09 from local files ...

📅 Processing 2024-10 from local files ...

📅 Processing 2024-11 from local files ...

📅 Processing 2024-12 from local files ...

📅 Processing 2025-01 from local files ...

📅 Processing 2025-02 from local files ...

📅 Processing 2025-03 from local files ...

📅 Processing 2025-04 from local files ...

📅 Processing 2025-05 from local files ...

📅 Processing 2025-06 from local files ...

📅 Processing 2025-07 from local files ...

📅 Processing 2025-08 from local files ...

📅 Processing 2025-09 from local files ...

📅 Processing 2025-10 from local files ...

✅ All-year summary saved: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_ALL

Unnamed: 0,GameState,Count,TotalRunsRemaining,ExpectedRuns
0,000-O0-B0-S0,216461,173712,0.802509
1,000-O0-B0-S1,98943,72089,0.728591
2,000-O0-B0-S2,48433,30189,0.623315
3,000-O0-B1-S0,87406,76687,0.877365
4,000-O0-B1-S1,81918,63986,0.781098


Total Valid Files Count

In [18]:
def count_all_valid_files(data_root):
    valid_file_count = 0

    for year in sorted(os.listdir(data_root)):
        year_path = os.path.join(data_root, year)
        if not os.path.isdir(year_path):
            continue

        for month in sorted(os.listdir(year_path)):
            month_path = os.path.join(year_path, month)
            if not os.path.isdir(month_path):
                continue

            for day in os.listdir(month_path):
                day_csv_path = os.path.join(month_path, day, "CSV")
                if not os.path.exists(day_csv_path):
                    continue

                for file in os.listdir(day_csv_path):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(day_csv_path, file)
                    try:
                        df = pd.read_csv(file_path)
                        required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                        if not required.issubset(df.columns):
                            continue
                        if df[df["Inning"] < 9].empty:
                            continue
                        valid_file_count += 1
                    except Exception:
                        continue

    print(f"\n✅ Total valid files used across all years: {valid_file_count}")

# 🔧 Run this with your data path
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
count_all_valid_files(data_root)


✅ Total valid files used across all years: 12914


Game state summary for months selected with probability of runs remaining

In [4]:
def build_gamestate_summary_local(data_root, save_path, year_to_process, months_to_process):
    summary = defaultdict(lambda: {
        "Count": 0,
        "TotalRunsRemaining": 0,
        "ZeroRunsCount": 0  # 👈 added
    })

    year_path = os.path.join(data_root, year_to_process)
    months = sorted([
        m for m in os.listdir(year_path)
        if os.path.isdir(os.path.join(year_path, m)) and m in months_to_process
    ])

    for month in months:
        month_path = os.path.join(year_path, month)
        print(f"\n📅 Processing {year_to_process}-{month} from local files ...")

        for day in os.listdir(month_path):
            day_csv_path = os.path.join(month_path, day, "CSV")
            if not os.path.exists(day_csv_path):
                continue

            for file in os.listdir(day_csv_path):
                if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                    continue

                file_path = os.path.join(day_csv_path, file)

                try:
                    df = pd.read_csv(file_path)
                    required = {'Inning','Top/Bottom','Outs','Balls','Strikes','RunsScored','PlayResult'}
                    if not required.issubset(df.columns):
                        continue

                    df = df[df['Inning'] < 9]
                    if df.empty:
                        continue

                    df = add_runner_states(df)
                    df = add_game_state(df)
                    df = add_runs_remaining(df)

                    # Aggregate: Count and TotalRunsRemaining
                    agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                    for state, row in agg.iterrows():
                        summary[state]["Count"] += row['count']
                        summary[state]["TotalRunsRemaining"] += row['sum']

                    # NEW: Count how many times RunsRemaining == 0 per GameState
                    zero_stats = calculate_zero_run_probabilities(df)
                    for state, val in zero_stats.items():
                        summary[state]["ZeroRunsCount"] += val["ZeroRunsCount"]

                except Exception:
                    continue

    # ✅ Convert to final summary DataFrame
    monthly_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"]/d["Count"] if d["Count"] else 0,
            "ZeroRunsCount": d["ZeroRunsCount"],
            "ZeroRunProbability": d["ZeroRunsCount"]/d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])

    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    month_range = f"{months[0]}to{months[-1]}" if len(months) > 1 else months[0]
    filename = f"GameState_Summary_prob_{year_to_process}_{month_range}_{timestamp}.csv"
    monthly_path = os.path.join(save_path, filename)
    monthly_df.to_csv(monthly_path, index=False)

    print(f"\n💾 Saved summary: {monthly_path}")
    print("\n✅ Local run complete.")
    return monthly_df

In [5]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

# Example: Process Feb and Mar 2024
summary_df = build_gamestate_summary_local(data_root, save_path, year_to_process="2024", months_to_process=["02", "03", "04", "05", "06", "07",])
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

📅 Processing 2024-04 from local files ...

📅 Processing 2024-05 from local files ...

📅 Processing 2024-06 from local files ...

📅 Processing 2024-07 from local files ...

💾 Saved summary: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_prob_2024_02to07_20251027_1256.csv

✅ Local run complete.


Unnamed: 0,GameState,Count,TotalRunsRemaining,ExpectedRuns,ZeroRunsCount,ZeroRunProbability
0,000-O0-B0-S0,89383,72226,0.808051,56529,0.632436
1,000-O0-B0-S1,41065,29912,0.728406,27162,0.661439
2,000-O0-B0-S2,20129,12585,0.625217,14163,0.703612
3,000-O0-B1-S0,36474,32122,0.880682,22203,0.608735
4,000-O0-B1-S1,34155,26723,0.782404,22031,0.64503


Invalid Game States File Paths Info

In [10]:
def find_invalid_game_state_files_tabular(data_root):
    print("\n🔍 Scanning for files with invalid GameState values...\n")

    records = []

    for year in sorted(os.listdir(data_root)):
        year_path = os.path.join(data_root, year)
        if not os.path.isdir(year_path):
            continue

        for month in sorted(os.listdir(year_path)):
            month_path = os.path.join(year_path, month)
            if not os.path.isdir(month_path):
                continue

            for day in os.listdir(month_path):
                csv_dir = os.path.join(month_path, day, "CSV")
                if not os.path.exists(csv_dir):
                    continue

                for file in os.listdir(csv_dir):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(csv_dir, file)

                    try:
                        df = pd.read_csv(file_path)
                        required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes'}
                        if not required.issubset(df.columns):
                            continue

                        # ✅ Apply inning filter
                        df = df[df['Inning'] < 9]
                        if df.empty:
                            continue

                        # 👇 Optional: if GameState is derived using runner states
                        df = add_runner_states(df)

                        # 🔍 Identify invalid rows
                        invalid_rows = df[(df['Outs'] > 2) | (df['Balls'] > 3) | (df['Strikes'] > 2)]

                        for _, row in invalid_rows.iterrows():
                            try:
                                game_state = f"{int(row['RunnerOn1B'])}{int(row['RunnerOn2B'])}{int(row['RunnerOn3B'])}-O{int(row['Outs'])}-B{int(row['Balls'])}-S{int(row['Strikes'])}"
                                records.append({
                                    "Invalid Game State": game_state,
                                    "File Path": file_path
                                })
                            except:
                                continue

                    except Exception as e:
                        print(f"❌ Error reading file {file_path}: {e}")

    # Display collected data
    if records:
        df_result = pd.DataFrame(records)
        print(df_result.to_string(index=False))
    else:
        print("✅ No invalid GameState values found.")

    print("\n✅ Scan complete.")

# 🔧 Usage
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
find_invalid_game_state_files_tabular(data_root)


🔍 Scanning for files with invalid GameState values...

Invalid Game State                                                                                      File Path
      110-O1-B4-S2      /Users/suma/Downloads/Baseball_Project/v3/2024/03/20/CSV/20240319-EarleCombsStadium-1.csv
      000-O2-B1-S3          /Users/suma/Downloads/Baseball_Project/v3/2024/03/20/CSV/20240319-Matador Field-1.csv
      100-O3-B0-S1             /Users/suma/Downloads/Baseball_Project/v3/2024/03/19/CSV/20240316-RodenField-1.csv
      000-O3-B1-S3       /Users/suma/Downloads/Baseball_Project/v3/2024/03/19/CSV/20240315-SmithsBallparkUT-1.csv
      110-O2-B2-S3         /Users/suma/Downloads/Baseball_Project/v3/2024/03/25/CSV/20240324-NickDenesField-1.csv
      110-O2-B2-S4         /Users/suma/Downloads/Baseball_Project/v3/2024/03/25/CSV/20240324-NickDenesField-1.csv
      100-O2-B1-S3      /Users/suma/Downloads/Baseball_Project/v3/2024/03/25/CSV/20240324-JoeMillerBallpark-1.csv
      100-O2-B4-S1   /Users/suma

Game state summary for all years without invalid game states

In [11]:
def build_gamestate_summary_all_years(data_root, save_path):
    summary = defaultdict(lambda: {"Count": 0, "TotalRunsRemaining": 0})

    years = sorted([y for y in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, y))])

    for year in years:
        year_path = os.path.join(data_root, year)
        months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])

        for month in months:
            month_path = os.path.join(year_path, month)
            print(f"\n📅 Processing {year}-{month} from local files ...")

            for day in os.listdir(month_path):
                day_csv_path = os.path.join(month_path, day, "CSV")
                if not os.path.exists(day_csv_path):
                    continue

                for file in os.listdir(day_csv_path):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(day_csv_path, file)

                    try:
                        df = pd.read_csv(file_path)
                        required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                        if not required.issubset(df.columns):
                            continue

                        df = df[df['Inning'] < 9]
                        if df.empty:
                            continue

                        df = add_runner_states(df)
                        df = add_game_state(df)
                        df = add_runs_remaining(df)

                        # ❌ Filter out invalid game states
                        df = df[(df['Outs'] <= 2) & (df['Balls'] <= 3) & (df['Strikes'] <= 2)]

                        agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                        for state, row in agg.iterrows():
                            summary[state]["Count"] += row['count']
                            summary[state]["TotalRunsRemaining"] += row['sum']

                    except Exception:
                        continue

    # 💾 Save final combined summary
    combined_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"] / d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])

    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    filename = f"GameState_Summary_ALL_{timestamp}.csv"
    summary_path = os.path.join(save_path, filename)
    combined_df.to_csv(summary_path, index=False)

    print(f"\n✅ All-year summary saved: {summary_path}")
    return combined_df

In [12]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_all_years(data_root, save_path)
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

📅 Processing 2024-04 from local files ...

📅 Processing 2024-05 from local files ...

📅 Processing 2024-06 from local files ...

📅 Processing 2024-07 from local files ...

📅 Processing 2024-08 from local files ...

📅 Processing 2024-09 from local files ...

📅 Processing 2024-10 from local files ...

📅 Processing 2024-11 from local files ...

📅 Processing 2024-12 from local files ...

📅 Processing 2025-01 from local files ...

📅 Processing 2025-02 from local files ...

📅 Processing 2025-03 from local files ...

📅 Processing 2025-04 from local files ...

📅 Processing 2025-05 from local files ...

📅 Processing 2025-06 from local files ...

📅 Processing 2025-07 from local files ...

📅 Processing 2025-08 from local files ...

📅 Processing 2025-09 from local files ...

📅 Processing 2025-10 from local files ...

✅ All-year summary saved: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_ALL

Unnamed: 0,GameState,Count,TotalRunsRemaining,ExpectedRuns
0,000-O0-B0-S0,216461,173712,0.802509
1,000-O0-B0-S1,98943,72089,0.728591
2,000-O0-B0-S2,48433,30189,0.623315
3,000-O0-B1-S0,87406,76687,0.877365
4,000-O0-B1-S1,81918,63986,0.781098


In [13]:
def calculate_zero_run_probabilities(df):
    """
    Helper function to count how many times RunsRemaining == 0
    for each GameState within a dataframe.
    Returns a dictionary like: { 'GameState': {'ZeroRunsCount': int} }
    """
    zero_df = df[df['RunsRemaining'] == 0]
    zero_counts = zero_df['GameState'].value_counts().to_dict()
    return {
        state: {"ZeroRunsCount": count}
        for state, count in zero_counts.items()
    }


def build_gamestate_summary_all_years(data_root, save_path):
    summary = defaultdict(lambda: {
        "Count": 0,
        "TotalRunsRemaining": 0,
        "ZeroRunsCount": 0  # ✅ Added field to track zero-run events
    })

    # 🔍 Loop through all years
    years = sorted([y for y in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, y))])

    for year in years:
        year_path = os.path.join(data_root, year)
        months = sorted([m for m in os.listdir(year_path) if os.path.isdir(os.path.join(year_path, m))])

        for month in months:
            month_path = os.path.join(year_path, month)
            print(f"\n📅 Processing {year}-{month} from local files ...")

            for day in os.listdir(month_path):
                day_csv_path = os.path.join(month_path, day, "CSV")
                if not os.path.exists(day_csv_path):
                    continue

                for file in os.listdir(day_csv_path):
                    if "_unverified" in file or "playerpositioning" in file or not file.endswith(".csv"):
                        continue

                    file_path = os.path.join(day_csv_path, file)

                    try:
                        df = pd.read_csv(file_path)
                        required = {'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'RunsScored', 'PlayResult'}
                        if not required.issubset(df.columns):
                            continue

                        # ✅ Filter to valid innings
                        df = df[df['Inning'] < 9]
                        if df.empty:
                            continue

                        # ✅ Add runner and game state logic
                        df = add_runner_states(df)
                        df = add_game_state(df)
                        df = add_runs_remaining(df)

                        # ✅ Filter invalid GameStates
                        df = df[(df['Outs'] <= 2) & (df['Balls'] <= 3) & (df['Strikes'] <= 2)]

                        # ✅ Aggregate runs and counts
                        agg = df.groupby('GameState')['RunsRemaining'].agg(['count', 'sum'])
                        for state, row in agg.iterrows():
                            summary[state]["Count"] += row['count']
                            summary[state]["TotalRunsRemaining"] += row['sum']

                        # ✅ Count zero-run cases per GameState
                        zero_stats = calculate_zero_run_probabilities(df)
                        for state, val in zero_stats.items():
                            summary[state]["ZeroRunsCount"] += val["ZeroRunsCount"]

                    except Exception:
                        continue

    # 💾 Convert dictionary summary → DataFrame
    combined_df = pd.DataFrame([
        {
            "GameState": s,
            "Count": d["Count"],
            "TotalRunsRemaining": d["TotalRunsRemaining"],
            "ExpectedRuns": d["TotalRunsRemaining"] / d["Count"] if d["Count"] else 0,
            "ZeroRunsCount": d["ZeroRunsCount"],
            "ZeroRunProbability": d["ZeroRunsCount"] / d["Count"] if d["Count"] else 0
        }
        for s, d in summary.items()
    ])

    # 💾 Save summary
    os.makedirs(save_path, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    filename = f"GameState_Summary_ALL_prob_{timestamp}.csv"
    summary_path = os.path.join(save_path, filename)
    combined_df.to_csv(summary_path, index=False)

    print(f"\n✅ All-year summary saved: {summary_path}")
    return combined_df

In [14]:
data_root = "/Users/suma/Downloads/Baseball_Project/v3"
save_path = "/Users/suma/Downloads/Baseball_Project/CSV_files"

summary_df = build_gamestate_summary_all_years(data_root, save_path)
summary_df.head()


📅 Processing 2024-02 from local files ...

📅 Processing 2024-03 from local files ...

📅 Processing 2024-04 from local files ...

📅 Processing 2024-05 from local files ...

📅 Processing 2024-06 from local files ...

📅 Processing 2024-07 from local files ...

📅 Processing 2024-08 from local files ...

📅 Processing 2024-09 from local files ...

📅 Processing 2024-10 from local files ...

📅 Processing 2024-11 from local files ...

📅 Processing 2024-12 from local files ...

📅 Processing 2025-01 from local files ...

📅 Processing 2025-02 from local files ...

📅 Processing 2025-03 from local files ...

📅 Processing 2025-04 from local files ...

📅 Processing 2025-05 from local files ...

📅 Processing 2025-06 from local files ...

📅 Processing 2025-07 from local files ...

📅 Processing 2025-08 from local files ...

📅 Processing 2025-09 from local files ...

📅 Processing 2025-10 from local files ...

✅ All-year summary saved: /Users/suma/Downloads/Baseball_Project/CSV_files/GameState_Summary_ALL

Unnamed: 0,GameState,Count,TotalRunsRemaining,ExpectedRuns,ZeroRunsCount,ZeroRunProbability
0,000-O0-B0-S0,216461,173712,0.802509,137520,0.635311
1,000-O0-B0-S1,98943,72089,0.728591,65494,0.661937
2,000-O0-B0-S2,48433,30189,0.623315,33994,0.701877
3,000-O0-B1-S0,87406,76687,0.877365,53423,0.611205
4,000-O0-B1-S1,81918,63986,0.781098,52830,0.644913
