#### Imports and Global Setting

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mplsoccer import Pitch
from ipywidgets import interact, Dropdown
from joblib import load 
from statsbombpy import sb

import warnings
warnings.filterwarnings("ignore")

# Load competitions dataset
df_competitions = pd.read_csv("../task1_xg/data/df_competitions.csv")
df_competitions = df_competitions.iloc[:-1]   # remove last row (TOTAL)

# StatsBomb competitions with IDs
df_sb_competitions = sb.competitions()

# Merge to add IDs
df_competitions = df_competitions.merge(
    df_sb_competitions,
    left_on=["Competition", "Season"],
    right_on=["competition_name", "season_name"],
    how="left"
)

# Keep only relevant columns and rename
df_competitions = df_competitions[
    ["Competition", "Season", "competition_id", "season_id"]
].rename(columns={
    "competition_id": "Competition_ID",
    "season_id": "Season_ID"
})

# Load DS4 dataset
df_ds4 = pd.read_csv("../task1_xg/data/DS4.csv")

# Quick checks
print("Competitions dataset shape:", df_competitions.shape)
print("\nColumns:", df_competitions.columns.tolist())
print("\nCompetitions available:", df_competitions['Competition'].unique())
print("\nDS4 shape:", df_ds4.shape)


Competitions dataset shape: (75, 4)

Columns: ['Competition', 'Season', 'Competition_ID', 'Season_ID']

Competitions available: ['FIFA World Cup' 'Champions League' 'La Liga' 'Copa del Rey'
 'North American League' 'FIFA U20 World Cup' 'Liga Profesional' 'Serie A'
 'UEFA Europa League' 'Premier League' '1. Bundesliga' 'Ligue 1'
 "FA Women's Super League" 'NWSL' "Women's World Cup" 'UEFA Euro'
 'Indian Super league' "UEFA Women's Euro" 'African Cup of Nations'
 'Major League Soccer' 'Copa America']

DS2 shape: (86833, 29)


In [None]:
# Load extra shot info (outcome + freeze frame)
shots_extra = pd.read_csv("../task1_xg/data/shots_df.csv", low_memory=False)        # low_memory to avoid dtype warning

# Rename 'id' in shots_extra to 'event_id' so it matches df_ds4
shots_extra = shots_extra.rename(columns={"id": "event_id"})

# Keep only the relevant columns
shots_extra = shots_extra[[
    "event_id", "shot_outcome", "shot_freeze_frame", "player", "team"
]]

# Merge with DS4 on event_id
df_ds4 = df_ds4.merge(shots_extra, on="event_id", how="left")

# Keep only relevant columns
df_ds4 = df_ds4[[
    "event_id", "match_id",
    "minute", "second", "period",
    "player", "team",
    "loc_x", "loc_y",
    "end_shot_x", "end_shot_y",
    "target_xg",
    "shot_outcome", "shot_freeze_frame"
]]

print("DS4 merged shape:", df_ds4.shape)
print("DS4 merged columns:", df_ds4.columns)


DS2 merged shape: (86833, 14)
DS2 merged columns: Index(['event_id', 'match_id', 'minute', 'second', 'period', 'player', 'team',
       'loc_x', 'loc_y', 'end_shot_x', 'end_shot_y', 'target_xg',
       'shot_outcome', 'shot_freeze_frame'],
      dtype='object')


#### Load all info about competitions, seasons, matches and shots

In [None]:
def load_competitions():
    """
    Return unique competition names from df_competitions
    """
    return sorted(df_competitions["Competition"].unique())


def load_seasons(competition: str):
    """
    Return available seasons for a given competition
    """
    return sorted(
        df_competitions.loc[
            df_competitions["Competition"] == competition, "Season"
        ].unique()
    )


def load_matches(competition: str, season: str):
    """
    Load matches for a given competition and season using statsbombpy.
    If no matches are found, return None.
    """
    rows = df_competitions.loc[
        (df_competitions["Competition"] == competition)
        & (df_competitions["Season"] == season)
    ]
    
    if rows.empty:
        print(f"No matches found for {competition} - {season}.")
        return None
    
    comp_id = rows["Competition_ID"].iloc[0]
    season_id = rows["Season_ID"].iloc[0]

    matches = sb.matches(competition_id=comp_id, season_id=season_id)
    if matches.empty:
        print(f"No matches returned by StatsBomb for {competition} - {season}.")
        return None

    return matches



def load_shots(match_id: int):
    """
    Filter shots from DS4 dataset for the given match_id.
    If no shots are found, return None and a message.
    """
    shots = df_ds4[df_ds4["match_id"] == match_id].copy()
    
    if shots.empty:
        print(f"No shots found for match_id={match_id}. "
              "Try selecting another match or competition.")
        return None
    
    return shots


#### Plot the field

In [61]:
def plot_shot(x, y, players=None):
    pitch = Pitch(pitch_type='statsbomb', pitch_length=120, pitch_width=80, pitch_color="grass", line_color="white")
    fig, ax = pitch.draw(figsize=(8, 5))

    # Shooter
    pitch.scatter(x, y, s=200, marker='*', color='red', ax=ax)

    # Example goalkeeper
    pitch.scatter(120, 40, s=200, marker='^', color='blue', ax=ax)

    plt.show()

In [62]:
GOAL_X, GOAL_Y = 120, 40
GOAL_WIDTH = 7.32

def shot_distance(x, y):
    return np.hypot(GOAL_X - x, GOAL_Y - y)

def shot_angle(x, y):
    left_post_y = GOAL_Y - GOAL_WIDTH/2
    right_post_y = GOAL_Y + GOAL_WIDTH/2
    angle = abs(np.arctan2(right_post_y - y, GOAL_X - x) -
                np.arctan2(left_post_y - y, GOAL_X - x))
    return angle

def baseline_xg(x, y):
    d = shot_distance(x, y)
    a = shot_angle(x, y)
    z = -2.2 - 4*(d/120) + 3.8*(a/np.pi)
    return 1 / (1 + np.exp(-z))


In [None]:
from ipywidgets import Dropdown, VBox, Output
import matplotlib.pyplot as plt
from mplsoccer import Pitch
import ast

# Output area for plot + text
output = Output()

# --- Dropdowns iniziali ---
comp_dd = Dropdown(options=["Select competition..."] + load_competitions(),
                   description="Competition:")

season_dd = Dropdown(options=["Select season..."], description="Season:", disabled=True)
match_dd = Dropdown(options=["Select match..."], description="Match:", disabled=True)
shot_dd = Dropdown(options=["Select shot..."], description="Shot:", disabled=True)

# --- Update logic ---
def update_seasons(change):
    if change["new"] and change["new"] != "Select competition...":
        seasons = load_seasons(change["new"])
        season_dd.options = ["Select season..."] + list(seasons)
        season_dd.disabled = False
        # reset cascata
        match_dd.options = ["Select match..."]
        match_dd.disabled = True
        shot_dd.options = ["Select shot..."]
        shot_dd.disabled = True
    else:
        season_dd.options = ["Select season..."]
        season_dd.disabled = True
        match_dd.options = ["Select match..."]
        match_dd.disabled = True
        shot_dd.options = ["Select shot..."]
        shot_dd.disabled = True

def update_matches(change):
    if change["new"] and change["new"] != "Select season...":
        matches = load_matches(comp_dd.value, change["new"])
        if matches is not None and not matches.empty:
            match_dd.options = ["Select match..."] + [
                f"{row['home_team']} vs {row['away_team']} ({row['match_id']})"
                for _, row in matches.iterrows()
            ]
        else:
            match_dd.options = ["No matches available"]
        match_dd.disabled = False
        # reset cascata
        shot_dd.options = ["Select shot..."]
        shot_dd.disabled = True
    else:
        match_dd.options = ["Select match..."]
        match_dd.disabled = True
        shot_dd.options = ["Select shot..."]
        shot_dd.disabled = True

def update_shots(change):
    if change["new"] and "vs" in change["new"]:
        match_id = int(change["new"].split("(")[-1].strip(")"))
        shots_df = load_shots(match_id)
        if shots_df is not None and not shots_df.empty:
            shot_dd.options = ["Select shot..."] + [
                f"{i}: Minute {row['minute']} - {row['player']}"
                for i, row in shots_df.iterrows()
            ]
        else:
            shot_dd.options = ["No shots available"]
        shot_dd.disabled = False
    else:
        shot_dd.options = ["Select shot..."]
        shot_dd.disabled = True

def update_demo(change):
    with output:
        output.clear_output()
        if change["new"] and "Select" not in change["new"] and "No " not in change["new"]:
            # estrai indice del tiro
            shot_idx = int(change["new"].split(":")[0])
            match_id = int(match_dd.value.split("(")[-1].strip(")"))
            shots_df = load_shots(match_id)
            if shots_df is None or shots_df.empty:
                print("No shots found for this match in DS4")
                return

            shot_row = shots_df.iloc[shot_idx]
            x, y = shot_row["loc_x"], shot_row["loc_y"]

            # Plot pitch
            fig, ax = plt.subplots(figsize=(8, 6))
            pitch = Pitch(pitch_type="statsbomb", line_color="black")
            pitch.draw(ax=ax)

            # Draw shot
            ax.scatter(x, y, c="red", s=120, marker="*", label="Shot")

            # Draw freeze frame players
            if pd.notna(shot_row["freeze_frame"]):
                try:
                    freeze = ast.literal_eval(shot_row["freeze_frame"])
                    for p in freeze:
                        px, py = p["location"]
                        if p.get("teammate", False):
                            color = "blue"
                        elif p.get("keeper", False):
                            color = "green"
                        else:
                            color = "black"
                        ax.scatter(px, py, c=color, s=80, alpha=0.8)
                except Exception as e:
                    print("Could not parse freeze_frame:", e)

            plt.show()

            # Print info
            print(f"Player: {shot_row['player']} ({shot_row['team']})")
            print("Predicted xG:", baseline_xg(x, y))
            print("True xG:", shot_row["target_xg"])
            print("Shot outcome:", shot_row["shot_outcome"])

# --- Attach observers ---
comp_dd.observe(update_seasons, names="value")
season_dd.observe(update_matches, names="value")
match_dd.observe(update_shots, names="value")
shot_dd.observe(update_demo, names="value")

# --- Display all dropdowns together + output ---
VBox([comp_dd, season_dd, match_dd, shot_dd, output])


VBox(children=(Dropdown(description='Competition:', options=('Select competition...', '1. Bundesliga', 'Africa…