### GETTING ISL Data

In [2]:
import pandas as pd
import numpy as np
from statsbombpy import sb
import os

# ----------------------------
# Target competitions (ISL 2021/22)
# ----------------------------
TARGET_COMPS = [
    # Use exact spelling and IDs from competitions.json
    ("Indian Super league", "2021/2022", 1238, 108),
]

# ----------------------------
# Extract Features from Shot Events
# ----------------------------
def extract_shot_features(events_df, competition_name):
    shots = events_df[events_df['type'] == 'Shot'].copy()

    def safe_extract(val):
        if isinstance(val, dict):
            return val.get('name')
        return val

    for col in [
        'shot_outcome','shot_body_part','shot_technique','shot_type',
        'play_pattern','possession_team','team','player'
    ]:
        if col in shots.columns:
            shots[col] = shots[col].apply(safe_extract)

    shots['goal'] = (shots['shot_outcome'] == 'Goal').astype(int)

    features = [
        'goal', 'location', 'shot_statsbomb_xg', 'minute', 'second',
        'possession', 'play_pattern', 'possession_team', 'team', 'player',
        'shot_body_part', 'shot_technique', 'shot_type', 'under_pressure',
        'off_camera', 'outcome', 'shot_outcome'
    ]
    shots = shots[[c for c in features if c in shots.columns]].copy()

    shots[['x','y']] = pd.DataFrame(shots['location'].tolist(), index=shots.index)
    shots.drop(columns=['location'], inplace=True)

    cat_cols = ['play_pattern','possession_team','team','player',
                'shot_body_part','shot_technique','shot_type','shot_outcome']
    for col in cat_cols:
        if col in shots.columns:
            shots[col] = pd.Categorical(shots[col]).codes

    shots['competition'] = competition_name
    return shots

# ----------------------------
# Extract Data from StatsBomb
# ----------------------------
def extract_isl_shot_data():
    all_shots = []

    for comp_name, season, comp_id, season_id in TARGET_COMPS:
        try:
            matches = sb.matches(competition_id=comp_id, season_id=season_id)
            for _, match in matches.iterrows():
                try:
                    events = sb.events(match_id=match['match_id'])
                    shots = extract_shot_features(events, f"{comp_name} {season}")
                    all_shots.append(shots)
                except Exception as e:
                    print(f"⚠️ Match {match['match_id']} failed: {e}")
        except Exception as e:
            print(f"❌ Failed for {comp_name} {season}: {e}")

    if all_shots:
        return pd.concat(all_shots, ignore_index=True)
    else:
        return pd.DataFrame()

# ----------------------------
# Run Extraction
# ----------------------------
if __name__ == "__main__":
    isl_shots = extract_isl_shot_data()
    if not isl_shots.empty:
        print(f"✅ Extracted {len(isl_shots)} shots from ISL 2021/22.")
        print(isl_shots.head())
        isl_shots.to_csv("isl_shots_2021_22.csv", index=False)
    else:
        print("❌ No shot data extracted.")




✅ Extracted 3095 shots from ISL 2021/22.
   goal  shot_statsbomb_xg  minute  second  possession  play_pattern  \
0     0           0.006878      10       8          26             3   
1     0           0.197586      13      50          31             3   
2     0           0.035779      20      16          44             1   
3     0           0.006839      29      18          68             3   
4     0           0.031176      38      25          82             2   

   possession_team  team  player  shot_body_part  shot_technique  shot_type  \
0                0     0      17               2               1          1   
1                1     1       8               0               1          1   
2                1     1      15               2               1          1   
3                1     1       0               2               1          1   
4                1     1      18               2               1          1   

  under_pressure off_camera  shot_outcome      x   