In [1]:
from mplsoccer.statsbomb import Sbopen
from db_connection import get_db
from sql_schemas import Matches, Events, EventTypes, Teams, Competitions, PlayerPositions, Person, Positions, EventSubTypes, EventOutcomes, Techniques, Shots, Passes, PlayPatterns, PlayingTimes
import pandas as pd
import numpy as np
import ipywidgets as widgets
from requests import Session

In [2]:
statsbomb_api = Sbopen()
db = next(get_db())
competition = statsbomb_api.competition()

In [None]:
def insert_player_positions(db: Session, df_event):
    event_records = df_event[0].to_dict(orient="records")
    
    event_ids = {p["id"] for p in event_records}

    existing_positions = db.query(PlayerPositions.event_id).filter(PlayerPositions.event_id.in_(event_ids)).all()
    existing_event_ids = {pos.event_id for pos in existing_positions}
    
    new_positions = [
        PlayerPositions(
            match_id=p['match_id'], teammate=p['teammate'], x=p['x'], y=p['y'],
            event_id=p['id']
        )
        for p in event_records if p["id"] not in existing_event_ids
    ]

    # Bulk insert only new records
    if new_positions:
        db.bulk_save_objects(new_positions)
        db.commit()

In [4]:
def insert_competitions(db: Session, df_competitions):
    competition_records = df_competitions.to_dict(orient="records")
    
    competition_ids = {p["competition_id"] for p in competition_records}

    existing_positions = db.query(Competitions.competition_id).filter(Competitions.competition_id.in_(competition_ids)).all()
    existing_competition_ids = {pos.competition_id for pos in existing_positions}
    new_competitions = [
        Competitions(
            competition_id=p['competition_id'],competition_name=p['competition_name'], season_id=p['season_id'],  season_name=p['season_name'],
            country_name=p['country_name'], competition_gender=p['competition_gender'],
            competition_youth=p['competition_youth'], competition_international=p['competition_international']
        )
        for p in competition_records if p['competition_id'] not in existing_competition_ids
    ]

    if new_competitions:
        db.bulk_save_objects(new_competitions)
        db.commit()


In [5]:
def init_table(db: Session, df, tableClass, id_column, name_column):
    rows = df.loc[:,[id_column,name_column]].drop_duplicates().to_dict(orient="records")
    row_ids = {p[id_column] for p in rows}
    existing_rows = db.query(tableClass).filter(tableClass.id.in_(row_ids)).all()
    existing_row_ids = {int(row.id) for row in existing_rows}
    existing_row_ids.add(-1)
    new_rows = [
        tableClass(
            id=p[id_column], name=p[name_column]
        )
        for p in rows if p[id_column] not in existing_row_ids
    ]
    if new_rows:
        db.bulk_save_objects(new_rows)

In [6]:
def insert_event_data(db: Session, df_event):
    df = df_event[0]
    df = df.fillna(-1)
    event_records = df.to_dict(orient="records")
    
    event_ids = {p["id"] for p in event_records}
    existing_events = db.query(Events).filter(Events.id.in_(event_ids)).all()
    existing_event_ids = {str(pos.id) for pos in existing_events}
    new_events = [
        Events(
            id=p['id'],
            match_id=p['match_id'],
            x=p['x'],
            y=p['y'],
            type_id=p['type_id'],
            player_id=p['player_id'],
            position_id=p['position_id'],
            end_x=p['end_x'],
            end_y=p['end_y'],
            period=p['period'],
            timestamp=p['timestamp'],
            team_id =p['team_id'],
            duration=p['duration'],
            possession_team_id=p['possession_team_id'],
            outcome_id=p['outcome_id'],
            sub_type_id=p['sub_type_id'],
            possession=p['possession']
        )
        for p in event_records if p['id'] not in existing_event_ids
    ]
    new_shots = [
        Shots(
            id=p['id'],
            technique_id = p['technique_id'],
            statsbomb_xg = p['shot_statsbomb_xg'],
            first_time = p.get('shot_first_time', False) is True,
            key_pass_id = p['shot_key_pass_id'] if p['shot_key_pass_id'] != -1  else None,
            one_on_one = p.get('shot_one_on_one', False) is True
        )
        for p in event_records if p['id'] not in existing_event_ids and p['type_name'] == "Shot"
    ]

    new_passes = [
        Passes(
            id=p['id'],
            recipient_id = p['pass_recipient_id'],
            pass_length = p['pass_length'],
            pass_angle = p['pass_angle'],
            pass_height = p['pass_height_name'],
            assisted_shot_id= p['pass_assisted_shot_id'] if p['pass_assisted_shot_id'] != -1 else None
        )
        for p in event_records if p['id'] not in existing_event_ids and p['type_name'] == "Pass"
    ]
    
    if new_events:
        db.bulk_save_objects(new_events)
        db.bulk_save_objects(new_shots)
        db.bulk_save_objects(new_passes)
        
    init_table(db,df,Person, 'player_id', 'player_name')
    init_table(db,df,EventTypes, 'type_id', 'type_name')
    init_table(db,df,Positions, 'position_id', 'position_name')
    init_table(db,df,EventSubTypes, 'sub_type_id', 'sub_type_name')
    init_table(db,df,EventOutcomes, 'outcome_id', 'outcome_name')
    init_table(db,df,Techniques, 'technique_id', 'technique_name')
    init_table(db,df,PlayPatterns, 'play_pattern_id', 'play_pattern_name')

In [7]:
def to_absolute_seconds(period, time_in_period):
    return (period - 1) * 45 * 60 + time_in_period.hour * 3600 + time_in_period.minute * 60 + time_in_period.second

In [8]:
import datetime
import pandas as pd

def save_playing_times(db, match):
    events = match[0]
    players = match[3].loc[:,["player_id","player_name", "match_id"]]
    players["start_period"] = 1
    players["start_time"] = datetime.time(0, 0, 0)
    subs = events[events["type_id"] == 19].loc[:, ["match_id", "player_id", "player_name", "substitution_replacement_id", "substitution_replacement_name", "period", "timestamp"]]
    player_subs = subs.loc[:, ["match_id", "substitution_replacement_id", "substitution_replacement_name", "period", "timestamp"]]
    subbed_off = subs.loc[:, ["match_id", "player_id", "period", "timestamp"]]
    subbed_off.rename(columns={"period": "end_period","timestamp": "end_time"}, inplace=True)
    player_subs.rename(columns={"period": "start_period","timestamp": "start_time","substitution_replacement_id" : "player_id", "substitution_replacement_name" : "player_name"}, inplace=True)


    players = pd.concat([players, player_subs])

    #teams = events[events["type_id"].isin([35,27,26,19, 18])].loc[:, ["period", "timestamp","type_name", "player_name","substitution_replacement_name", "tactics_formation"]]
    match_end = events[events["type_id"] == 34].loc[:,["period", "timestamp"]]

    players["end_period"] = match_end["period"].iloc[-1]
    players["end_time"] = match_end["timestamp"].iloc[-1]
    players.set_index("player_id", inplace=True)
    players.update(subbed_off.set_index("player_id"), overwrite=True)
    players.reset_index(inplace=True)
    #players.to_sql("playing_times", con=db, if_exists="replace", index=False)
    players["start_abs"] = players.apply(lambda row: to_absolute_seconds(row["start_period"], row["start_time"]), axis=1)
    players["end_abs"]   = players.apply(lambda row: to_absolute_seconds(row["end_period"], row["end_time"]), axis=1)
    players["minutes"] = (players["end_abs"] - players["start_abs"]) / 60

    db_players = [
        PlayingTimes(
            match_id = p['match_id'],
            player_id = p['player_id'],
            player_name = p['player_name'],
            start_period = p['start_period'],
            start_time = p['start_time'],
            end_period = p['end_period'],
            end_time = p['end_time'],
            minutes = p['minutes']
        )
        for p in players.to_dict(orient="records")
    ]
    db.bulk_save_objects(db_players)

In [None]:
competition = statsbomb_api.competition()

progress = widgets.IntProgress(
    value=0,
    min=0,
    max=10,
    description='Loading:',
    bar_style='',
    style={'bar_color': 'blue'},
    orientation='horizontal'
)
display(progress)

competitions = competition[~competition['match_available_360'].isna()]
insert_competitions(db, competitions)
for index, row in competitions.iterrows():
    if True or row.season_id == 282:
        matches = statsbomb_api.match(competition_id=row['competition_id'], season_id=row['season_id'])
        matches = matches[matches['match_status_360'] =='available']
        matches = matches.fillna(-1)
        progress.description = f"{row['competition_name']}: "
        progress.value = 0
        progress.max = len(matches)
        print(f"{row['competition_name']} - {row['season_name']}: {len(matches)} matches")
        init_table(db,matches, Person, 'home_team_managers_id', 'home_team_managers_name')
        init_table(db,matches, Person, 'away_team_managers_id', 'away_team_managers_name')

        for matchindex, match in matches.iterrows():
            try:
                df_event = statsbomb_api.event(match['match_id'])
                df_frame = statsbomb_api.frame(match['match_id'])
                progress.value+=1
                #player_pos, events = a.frame(match_id=match['match_id'])
                db_match = Matches(
                                id=match['match_id'], 
                                match_date=match['match_date'], 
                                away_score=match['away_score'], 
                                home_score=match['home_score'],
                                home_team_id=match['home_team_id'],
                                home_manager_id=match['home_team_managers_id'],
                                away_team_id=match['away_team_id'],
                                away_manager_id=match['away_team_managers_id'],
                                competition_id = match['competition_id'],
                                match_week=match['match_week'],
                                season_id=match['season_id'],
                                referee=match['referee_name'],
                                stadium=match['stadium_name'],
                                )
                qry_object = db.query(Matches).where(Matches.id == db_match.id)
                if qry_object.first() is None:
                    db.add(db_match)
                db_home_team=Teams(id=match['home_team_id'], name=match['home_team_name'], country=match['home_team_country_name'])
                qry_object = db.query(Teams).where(Teams.id == db_home_team.id)
                if qry_object.first() is None:
                    db.add(db_home_team)
                db_away_team = Teams(id=match['away_team_id'], name=match['away_team_name'], country=match['away_team_country_name'])
                qry_object = db.query(Teams).where(Teams.id == db_away_team.id)
                if qry_object.first() is None:
                    db.add(db_away_team)
                db.commit()

                save_playing_times(db=db, match=df_event)
                insert_event_data(db=db,df_event=df_event)

                insert_player_positions(db=db,df_event=df_frame)
            except Exception as e:
                print(e)
        
db.close()

IntProgress(value=0, description='Loading:', max=10, style=ProgressStyle(bar_color='blue'))

  matches = matches.fillna(-1)


1. Bundesliga - 2023/2024: 34 matches
FIFA World Cup - 2022: 64 matches


  matches = matches.fillna(-1)


La Liga - 2020/2021: 35 matches


HTTPError: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/statsbomb/open-data/master/data/three-sixty/3773386.json