In [13]:
#!/usr/bin/env python3

import pandas as pd
import sys

def parse_home_away_from_url(raw_val):
    """
    Given a match_report URL such as
      '/en/matches/bf52349b/Fulham-Arsenal-September-12-2020-Premier-League'
    we parse out the last part, e.g. 'Fulham-Arsenal-September-12-2020-Premier-League',
    and return (home, away) as the first two tokens split by '-'.

    If the data is missing or the format is unexpected, we return (None, None).
    """
    if pd.isna(raw_val):
        return (None, None)

    # Convert to string in case it's not already
    url_str = str(raw_val)

    # Get everything after the last slash
    last_part = url_str.split('/')[-1]  # e.g. 'Fulham-Arsenal-September-12-2020-Premier-League'
    parts = last_part.split('-')
    if len(parts) < 2:
        # We can't parse out 2 tokens
        return (None, None)

    # The user indicated "first chunk is home, second chunk is away"
    home = parts[0]
    away = parts[1]
    return (home, away)

def add_home_away_columns(csv_path):
    """
    Loads the CSV at 'csv_path', parses 'match_report' to create
    'home_team_ind' and 'away_team_ind', then saves the updated CSV in place.
    """
    print(f"[INFO] Processing: {csv_path}")
    try:
        df = pd.read_csv(csv_path)
    except Exception as e:
        print(f"    [ERROR] Could not read {csv_path}: {e}")
        return

    if "match_report" not in df.columns:
        print(f"    [WARNING] No 'match_report' column in {csv_path}; skipping.")
        return

    # Apply our parser
    home_away_series = df["match_report"].apply(parse_home_away_from_url)

    # Extract the two tokens as columns
    df["home_team_ind"] = home_away_series.apply(lambda x: x[0])
    df["away_team_ind"] = home_away_series.apply(lambda x: x[1])

    # Save updated CSV (overwriting the original)
    try:
        df.to_csv(csv_path, index=False)
        print(f"    [INFO] Saved updated CSV with home_team_ind & away_team_ind to {csv_path}")
    except Exception as e:
        print(f"    [ERROR] Could not write {csv_path}: {e}")

def main():
    # List your CSVs here. Adjust as needed or pass them in via sys.argv, etc.
    csv_files = [
        "ENG-Premier League_schedule.csv",
        "eng-premier_league_defense_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_goal_shot_creation_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_keeper_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_misc_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_passing_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_passing_types_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_possession_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "eng-premier_league_shooting_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
    ]

    for csv_path in csv_files:
        add_home_away_columns(csv_path)
    csv_files = [
        "ESP-La Liga_schedule.csv",
        "esp-la_liga_defense_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "esp-la_liga_goal_shot_creation_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "esp-la_liga_keeper_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "esp-la_liga_passing_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "esp-la_liga_passing_types_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "esp-la_liga_possession_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv",
        "esp-la_liga_shooting_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv"
    ]
    for csv_path in csv_files:
        add_home_away_columns(csv_path)
if __name__ == "__main__":
    main()


[INFO] Processing: ENG-Premier League_schedule.csv
    [INFO] Saved updated CSV with home_team_ind & away_team_ind to ENG-Premier League_schedule.csv
[INFO] Processing: eng-premier_league_defense_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv
    [INFO] Saved updated CSV with home_team_ind & away_team_ind to eng-premier_league_defense_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv
[INFO] Processing: eng-premier_league_goal_shot_creation_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv
    [INFO] Saved updated CSV with home_team_ind & away_team_ind to eng-premier_league_goal_shot_creation_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv
[INFO] Processing: eng-premier_league_keeper_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2025'].csv
    [INFO] Saved updated CSV with home_team_ind & away_team_ind to eng-premier_league_keeper_['2020-2021',_'2021-2022',_'2022-2023',_'2023-2024',_'2024-2