In [1]:
from pathlib import Path
import pandas as pd

In [2]:
# Базовые пути
BASE = Path("..").resolve()
RAW_BASE = BASE / "data" / "raw" / "fbref"

SEASONS = ["epl_2019-2020","epl_2020-2021","epl_2021-2022","epl_2022-2023","epl_2023-2024","epl_2024-2025"]
FILES = ["schedule_results.csv", "standings.csv", "team_standard_stats.csv"]

In [3]:
# Быстрая проверка наличия файлов по каждому сезону
summary = []
for s in SEASONS:
    d = RAW_BASE / s
    row = {"season": s}
    for f in FILES:
        p = d / f
        row[f] = "OK" if p.exists() else "MISSING"
        if p.exists():
            try:
                df = pd.read_csv(p, nrows=2)
                row[f + "_cols"] = df.shape[1]
            except Exception as e:
                row[f] = f"READ_ERR: {e}"
    summary.append(row)

pd.DataFrame(summary)

Unnamed: 0,season,schedule_results.csv,schedule_results.csv_cols,standings.csv,standings.csv_cols,team_standard_stats.csv,team_standard_stats.csv_cols
0,epl_2019-2020,OK,14,OK,19,OK,32
1,epl_2020-2021,OK,14,OK,19,OK,32
2,epl_2021-2022,OK,14,OK,19,OK,32
3,epl_2022-2023,OK,14,OK,19,OK,32
4,epl_2023-2024,OK,14,OK,19,OK,32
5,epl_2024-2025,OK,14,OK,19,OK,32


In [4]:
all_standings = []
all_teamstats = []

for s in SEASONS:
    d = RAW_BASE / s
    try:
        st = pd.read_csv(d / "standings.csv")
        st["season"] = s
        all_standings.append(st)
    except Exception as e:
        print(f"[ERR] standings {s}: {e}")

    try:
        ts = pd.read_csv(d / "team_standard_stats.csv")
        ts["season"] = s
        all_teamstats.append(ts)
    except Exception as e:
        print(f"[ERR] team stats {s}: {e}")

standings_all = pd.concat(all_standings, ignore_index=True)
teamstats_all = pd.concat(all_teamstats, ignore_index=True)

print("✅ Объединение завершено!")
print("Standings shape:", standings_all.shape)
print("Team stats shape:", teamstats_all.shape)

✅ Объединение завершено!
Standings shape: (120, 20)
Team stats shape: (120, 33)
