In [2]:
from pathlib import Path

import pandas as pd

root = Path.cwd()
while not (root / ".git").exists() and root.parent != root:
    root = root.parent

DATA_PATH = root / "data" / "fbref"
files = list(DATA_PATH.glob("*/*.csv"))

ref = DATA_PATH / "references" / "fbref_player_reference.json"
gk_ref = DATA_PATH / "references" / "fbref_keeper_reference.json"
team_ref = DATA_PATH / "references" / "fbref_team_reference.json"

for f in files[:]:
    current_file = Path(f)
    parent_dir = current_file.parent
    file_name = current_file.stem
    year = file_name[-5:]

    print(f"Processing: {file_name}")

    df = pd.read_csv(current_file)

    if "keeper" in file_name:
        ref_df = pd.read_json(gk_ref)
    elif "team" in file_name:
        ref_df = pd.read_json(team_ref)
    else:
        ref_df = pd.read_json(ref)

    ref_df = ref_df.T
    metrics = ref_df.iloc[:, 1].to_dict()

    keys = metrics.keys()
    print(keys)

    for k in keys:
        m = metrics[k]
        _df = df.copy()
        _df = _df[m]

        if "team" not in file_name:
            pos = _df["position"].str.replace(" ", "", regex=False)

            dummies = pos.str.get_dummies(sep=",")

            dummies = dummies.reindex(columns=["GK", "DF", "MF", "FW"], fill_value=0)

            dummies = dummies.add_prefix("position_")

            _df = pd.concat([_df.drop(columns="position"), dummies], axis=1)

        output_dir = parent_dir / year
        output_dir.mkdir(parents=True, exist_ok=True)

        # Save file
        json_path = output_dir / f"{file_name + '_' + k.split('_')[-1]}.json"
        print(f"Saving to: {json_path}")
        _df.to_json(json_path, orient="records", indent=4)

Processing: PL_team_22_23
dict_keys(['team_all_in_one', 'team_shooting', 'team_passing', 'team_passing_types', 'team_gca', 'team_defense', 'team_possession', 'team_misc', 'team_playing_time', 'team_keepers_adv'])
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_one.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_shooting.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_passing.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_types.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_gca.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_defense.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_possession.json
Saving to: /home/kheaw/projects/special_scoop/data/fbref/PL_team/22_23/PL_team_22_23_misc.json
Saving to: /