In [1]:
import json
import pandas as pd

# Load fixtures
with open("../data/raw/fixtures.json") as f:
    fixtures = json.load(f)
fixtures_df = pd.DataFrame(fixtures)

# Load bootsrap-static.json to get team strengths
with open("../data/raw/bootstrap-static.json") as f:
    data = json.load(f)
teams_df = pd.DataFrame(data["teams"])

In [2]:
# Map team ID to name
team_id_to_name = dict(zip(teams_df["id"], teams_df["name"]))

# Strength dictionaries
att_home = dict(zip(teams_df["id"], teams_df["strength_attack_home"]))
att_away = dict(zip(teams_df["id"], teams_df["strength_attack_away"]))
def_home = dict(zip(teams_df["id"], teams_df["strength_defence_home"]))
def_away = dict(zip(teams_df["id"], teams_df["strength_defence_away"]))

In [3]:
rows = []

for _, row in fixtures_df.iterrows():
    gameweek = row["event"]
    home_id = row["team_h"]
    away_id = row["team_a"]

    # Home team fixture
    rows.append({
        "gameweek": gameweek,
        "team": team_id_to_name[home_id],
        "opponent": team_id_to_name[away_id],
        "is_home": True,
    })

    # Away team fixture
    rows.append({
        "gameweek": gameweek,
        "team": team_id_to_name[away_id],
        "opponent": team_id_to_name[home_id],
        "is_home": False,
    })

# Convert to DataFrame
fixture_difficulty_df = pd.DataFrame(rows)
fixture_difficulty_df.sort_values(by=["gameweek", "team"], inplace=True)

In [4]:
fixture_difficulty_df.to_csv("../data/processed/fixture_difficulty.csv", index=False)

In [6]:
import pandas as pd

# Load the uploaded files
fixture_df = pd.read_csv("../data/processed/fixture_difficulty.csv")
team_form_df = pd.read_csv("../data/processed/team_form_dynamic.csv")

# Display the first few rows of each to confirm structure
fixture_df.head(), team_form_df.head()

(   gameweek         team        opponent  is_home
 0         1      Arsenal          Wolves     True
 1         1  Aston Villa        West Ham    False
 2         1  Bournemouth   Nott'm Forest    False
 3         1    Brentford  Crystal Palace     True
 4         1     Brighton         Everton    False,
           team  att_form_home  def_form_home  att_form_away  def_form_away  \
 0      Arsenal           1.00           7.33           0.67           7.33   
 1  Aston Villa           0.00           0.00           0.00           0.00   
 2  Bournemouth           0.33          14.00           1.33          13.00   
 3    Brentford           0.33           7.00           1.00          11.00   
 4     Brighton           1.33          16.33           2.00          13.33   
 
    att_form_home_norm  att_form_away_norm  def_form_home_norm  \
 0               0.500            0.250936            0.792528   
 1               0.000            0.000000            1.000000   
 2               0.

In [8]:
# Step 1: Prepare team form for merging
form_main = team_form_df.rename(columns=lambda col: f"team_{col}" if col != "team" else col)
form_opp = team_form_df.rename(columns=lambda col: f"opp_{col}" if col != "team" else "opponent")

# Step 2: Merge into fixture table
merged_df = fixture_df.merge(form_main, on="team", how="left")
merged_df = merged_df.merge(form_opp, on="opponent", how="left")

# Step 3: Compute xG_score and xCS_score
def compute_xG(row):
    if row["is_home"]:
        return 0.6 * row["team_att_form_home_norm"] + 0.4 * (1 - row["opp_def_form_away_norm"])
    else:
        return 0.6 * row["team_att_form_away_norm"] + 0.4 * (1 - row["opp_def_form_home_norm"])

def compute_xCS(row):
    if row["is_home"]:
        return 0.6 * row["team_def_form_home_norm"] + 0.4 * (1 - row["opp_att_form_away_norm"])
    else:
        return 0.6 * row["team_def_form_away_norm"] + 0.4 * (1 - row["opp_att_form_home_norm"])

merged_df["xG_score"] = merged_df.apply(compute_xG, axis=1)
merged_df["xCS_score"] = merged_df.apply(compute_xCS, axis=1)

# Select relevant columns for output
final_fixture_scores = merged_df[[
    "gameweek", "team", "opponent", "is_home", "xG_score", "xCS_score"
]].sort_values(by=["gameweek", "team"])

print(final_fixture_scores.head())

   gameweek         team        opponent  is_home  xG_score  xCS_score
0         1      Arsenal          Wolves     True  0.497670   0.676266
1         1  Aston Villa        West Ham    False  0.000000   1.000000
2         1  Bournemouth   Nott'm Forest    False  0.325256   0.524673
3         1    Brentford  Crystal Palace     True  0.230733   0.831683
4         1     Brighton         Everton    False  0.607944   0.451684
