In [3]:
import json
import pandas as pd
from pathlib import Path

# 0) 
# result_root = Path("Code/3.Prediction")
result_root = Path.cwd()

# 1) Discover all result dirs matching your pattern
result_dirs = sorted([d for d in result_root.iterdir()
                      if d.is_dir() and d.name.startswith("mc_rg_results")])

if not result_dirs:
    raise FileNotFoundError(f"No mc_rg_results* directories found under {result_root}")

print("Found result directories:")
for d in result_dirs:
    print(" ", d)

# 2) Load all partial_*.json files
records = []
for d in result_dirs:
    for fn in sorted(d.glob("partial_*.json")):
        data = json.loads(fn.read_text(encoding="utf-8"))
        runs = data.get("runs", 0)
        for stat in data.get("stats", []):
            records.append({
                "id": stat.get("id"),
                "name": stat.get("name"),
                "wins": stat.get("wins", 0),
                "quarterfinals": stat.get("quarterfinals", 0),
                "semifinals": stat.get("semifinals", 0),
                "runs": runs
            })

# 3) Build DataFrame
df = pd.DataFrame(records)
print("\nConstructed DataFrame with columns:\n", df.columns.tolist())
print(f"Total rows: {len(df)}")

if df.empty:
    print("No records found. Check that your partial_*.json files actually contain a 'stats' list.")
else:
    # 4) Aggregate
    agg = (
        df.groupby(["id", "name"], as_index=False)
          .agg({
              "wins": "sum",
              "quarterfinals": "sum",
              "semifinals": "sum",
              "runs": "sum"
          })
    )

    # 5) Compute probabilities
    agg["champion_probability"] = agg["wins"] / agg["runs"]
    agg["qf_rate"] = agg["quarterfinals"] / agg["runs"]
    agg["sf_rate"] = agg["semifinals"] / agg["runs"]

    # 6) Sort
    agg = agg.sort_values("wins", ascending=False)

agg = agg[[
    "name",
    "wins",
    "runs",
    "champion_probability",
    "semifinals",
    "sf_rate",
    "quarterfinals",
    "qf_rate"
]]

# 2) Display the new table
display(agg.style.format({
    "champion_probability": "{:.2%}",
    "sf_rate": "{:.2%}",
    "qf_rate": "{:.2%}"
}))

Found result directories:
  /mnt/netapp2/Store_uni/home/ulc/cursos/curso363/TFM/Data-Analytics-with-HPC/Code/4.Prediction/RG_2025/mc_rg_results
  /mnt/netapp2/Store_uni/home/ulc/cursos/curso363/TFM/Data-Analytics-with-HPC/Code/4.Prediction/RG_2025/mc_rg_results_exculive
  /mnt/netapp2/Store_uni/home/ulc/cursos/curso363/TFM/Data-Analytics-with-HPC/Code/4.Prediction/RG_2025/mc_rg_results_seq

Constructed DataFrame with columns:
 ['id', 'name', 'wins', 'quarterfinals', 'semifinals', 'runs']
Total rows: 355


Unnamed: 0,name,wins,runs,champion_probability,semifinals,sf_rate,quarterfinals,qf_rate
76,ALCARAZ Carlos,188,2000,9.40%,406,20.30%,586,29.30%
39,TSITSIPAS Stefanos,184,2000,9.20%,378,18.90%,504,25.20%
63,SINNER Jannik,154,2000,7.70%,426,21.30%,596,29.80%
3,DJOKOVIC Novak,140,2000,7.00%,406,20.30%,626,31.30%
0,ZVEREV Alexander,114,2000,5.70%,308,15.40%,510,25.50%
44,RUUD Casper,78,2000,3.90%,240,12.00%,422,21.10%
78,RUNE Holger,74,2000,3.70%,268,13.40%,464,23.20%
32,RUBLEV Andrey,66,2000,3.30%,220,11.00%,316,15.80%
69,MUSETTI Lorenzo,52,2000,2.60%,202,10.10%,318,15.90%
55,DE MINAUR Alex,50,2000,2.50%,156,7.80%,322,16.10%


In [2]:
# 3) Verify that sum of all wins == total tournaments run
total_wins = agg["wins"].sum()
print(f"Sum of wins across all players: {total_wins}")

Sum of wins across all players: 200
