In [None]:
#file used to test scripts
from pathlib import Path
from functools import partial
import pandas as pd
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

import numpy as np
import mlflow
from scipy.ndimage import zoom

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
from unxpass.databases import SQLiteDatabase
from unxpass.datasets import PassesDataset, CompletedPassesDataset, FailedPassesDataset
from unxpass.components import pass_selection, pass_value, pass_success, pass_value_custom
from unxpass.components.utils import load_model
from unxpass.visualization import plot_action
from unxpass.ratings_custom import LocationPredictions

from matplotlib.backends.backend_pdf import PdfPages

from notebooks import playVisualizers
model_pass_value = pass_value.SoccerMapComponent(
    model=mlflow.pytorch.load_model(
        'runs:/20e7d3695d7049d0a513922d32b44a11/model', map_location='cpu'
        #'runs:/788ec5a232af46e59ac984d50ecfc1d5/model', map_location='cpu'
    ), offensive = False
)
DATA_DIR = Path("../stores/")
dbpath = "/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/buli_all.sql"
feat_path = "/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/all_features_fail"
db = SQLiteDatabase(dbpath)
dataset_test = partial(PassesDataset, path = feat_path)
#surfaces = model_pass_value.predict_surface(dataset_test, db = db, model_name = "val", game_id = game_id)

In [48]:
def getFlips(game_id):
    events = load_xml.load_event(f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/event_data_all/{game_id}.xml")
    events[['TeamLeft', 'TeamRight']] = events[['TeamLeft', 'TeamRight']].fillna(method='ffill')
    #direction of play to right
    #if team in possession is team left - all good
    #if team in possession is team right - need to flip
    return events.loc[events['Team'] == events['TeamRight'], 'EventId']
def frametodict(group, shouldFlip):
    """
    Converts a group of tracking data into a dictionary with player IDs as keys and their translated positions.
    """
    # Exclude the BALL rows and work on a copy to avoid SettingWithCopyWarning
    noball = group[group['TeamId'] != "BALL"].copy()

    # Ensure numeric conversion for X and Y (if not already floats)
    noball['X'] = noball['X'].astype(float)
    noball['Y'] = noball['Y'].astype(float)
    
    # Compute translated coordinates using vectorized operations
    noball['X_translated'] = 120 - (1.09361 * noball['X'] + 60)
    noball['Y_translated'] = 80 - (-1.09361 * noball['Y'] + 40)
    
    # Apply flip if necessary
    if shouldFlip:
        noball['X_translated'] = 120 - noball['X_translated']
        noball['Y_translated'] = 80 - noball['Y_translated']
    
    # Build the dictionary using itertuples for faster iteration
    locs = {
        row.PersonId: {"X": row.X_translated, "Y": row.Y_translated, "Team": row.TeamId}
        for row in noball.itertuples(index=False)
    }
    
    return locs


In [65]:
#getting speeds from n prior frames for buli data
from unxpass import load_xml
#load all relevant data
def getSpeedBuli(game_id, action_id, tracking_groups, eventcsv, id_to_event, flips, framesback = 10):
    timediff = 0.04 * framesback
    buli_id = id_to_event[(game_id, action_id)]
    shouldflip = buli_id in flips
    
    event = eventcsv[eventcsv['EVENT_ID'] == int(buli_id.split(".")[0])].iloc[0]
    event_frame = event['FRAME_NUMBER']
    team = event["CUID1"]
    event_frame_str = str(event_frame)
    prior_frame_str = str(event_frame - framesback)
    try:
        event_pos = tracking_groups.get_group(event_frame_str)
    except KeyError:
        event_pos = pd.DataFrame()
    try:
        prior_pos = tracking_groups.get_group(prior_frame_str)
    except KeyError:
        prior_pos = pd.DataFrame()
    if len(event_pos) > 0:
        event_pos = frametodict(event_pos, shouldflip)
    if len(prior_pos) > 0:
        prior_pos = frametodict(prior_pos, shouldflip)
    #return event_pos, prior_pos
    speed_output = []
    for player, pos in event_pos.items():
        isTeammate = event_pos[player]["Team"] == team
        prior = prior_pos.get(player)
        if player not in prior_pos:
            x_diff = 0
            y_diff = 0
        else:
            x_diff = event_pos[player]["X"] - prior_pos[player]["X"]
            y_diff = event_pos[player]["Y"] - prior_pos[player]["Y"]
        x_velo = x_diff / timediff
        y_velo = y_diff / timediff
        player_dict = {
            "player": player,
            "isTeammate": isTeammate,
            "x_velo": x_velo,
            "y_velo": y_velo,
            "location": [event_pos[player]["X"], event_pos[player]["Y"]],
        }
        speed_output.append(player_dict)
    return speed_output
    #need is teammate, location, and speed 

In [None]:
import timeit
skeleton = pd.read_parquet("/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/all_features_fixed/x_endlocation.parquet")
def addAllSpeedBuli(skeleton, db):
    games = skeleton.index.get_level_values(0).unique()
    output = pd.DataFrame(index = skeleton.index)
    output["freeze_frame_360_a0"] = np.nan
    output["freeze_frame_360_a0"] = output["freeze_frame_360_a0"].astype(object)
    for game_id in games[0:1]:
        id_to_event = db.actions(game_id = game_id)['original_event_id'].to_dict()
        eventcsv = load_xml.load_csv_event(f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/KPI_Merged_all/KPI_MGD_{game_id}.csv")
        trackingdf = load_xml.load_tracking(f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/zipped_tracking/zip_output/{game_id}.xml")
        tracking_groups = trackingdf.groupby('N')
        game_mask = skeleton.index.get_level_values(0) == game_id
        flips = getFlips(game_id)
        for game_id, action_id in skeleton.index[game_mask]:
            print(game_id, action_id)
            output.at[(game_id, action_id), "freeze_frame_360_a0"] = [getSpeedBuli(game_id, action_id, tracking_groups, eventcsv, id_to_event, flips = flips)]
addAllSpeedBuli(skeleton)


In [38]:
matches_map = {}
#creates map of hawkeye game ids and statsbomb game ids
from os import listdir
game_dir = "/home/lz80/rdf/sp161/shared/soccer-decision-making/allHawkEye/"
games = pd.read_json("/home/lz80/rdf/sp161/shared/soccer-decision-making/Hawkeye_AllGames/matches/53/106.json", convert_dates = False)#directory of statsbomb matches data
games['home_team'] = games.apply(lambda d: d['home_team']['home_team_name'], axis = 1).str.replace("Women's", "").str.replace("WNT", "").str.strip()
games['away_team'] = games.apply(lambda d: d['away_team']['away_team_name'], axis = 1).str.replace("Women's", "").str.replace("WNT", "").str.strip()
dirfiles = [f for f in listdir(game_dir) if not f.startswith('.')]
for game in dirfiles:
    home_team = game.split('_')[1]
    away_team = game.split('_')[2]
    #print(games['away_team'])
    game_id = games[(games['home_team'] == home_team) & (games['away_team'] == away_team)].reset_index().loc[0]['match_id']
    matches_map[game] = int(game_id)
import json 
outpath = "/home/lz80/rdf/sp161/shared/soccer-decision-making/hawkeye_to_sb.json"
with open(outpath, "w") as f:
    json.dump(matches_map, f)

In [3]:
def getGksTM(game_id):
        lineups = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/womens_euro_receipts/lineups/{game_id}.json"
        lineup_df = pd.read_json(lineups, convert_dates = False)
        team_1 = lineup_df['team_id'].loc[0]
        team_2 = lineup_df['team_id'].loc[1]
        team_1_dict = lineup_df['lineup'].loc[0]
        team_2_dict = lineup_df['lineup'].loc[1]
        team_1_lineup = [player_dict['player_id'] for player_dict in team_1_dict]
        team_2_lineup = [player_dict['player_id'] for player_dict in team_2_dict]
        team_map = {team_1 : team_1_lineup, team_2 : team_2_lineup}#building a map of team id to player ids
        player_to_team = {player_id: team_id for team_id, players in team_map.items() for player_id in players} #mapping players to teams
        pos_dict = {player['player_id']: player['positions'][0]['position'] for player in team_1_dict if len(player['positions']) > 0}
        team_2_pos_dict = {player['player_id']: player['positions'][0]['position'] for player in team_2_dict if len(player['positions']) > 0}
        pos_dict.update(team_2_pos_dict)
        goalkeepers = [key for (key,value) in pos_dict.items() if value == "Goalkeeper"]
        return player_to_team, goalkeepers#gets set of goalkeepers too
def clean_dict(freeze_frame):
    output = {}
    for player in freeze_frame:
        player_dict = {}
        player_dict['teammate'] = player['teammate']
        player_dict['x'] = player['location'][0]
        player_dict['y'] = player['location'][1]
        output[player['player']] = player_dict
    return output
def getSpeedsHawkeye(init, event, timediff):
    output = []
    for player in event:
        player_dict = {}
        if player not in init:
            continue
        x_diff = event[player]['x'] - init[player]['x']
        y_diff = event[player]['y'] - init[player]['y']
        x_velo = x_diff / timediff
        y_velo = y_diff / timediff
        player_dict['player'] = player
        player_dict['teammate'] = event[player]['teammate']
        player_dict['x_velo'] = x_velo
        player_dict['y_velo'] = y_velo
        player_dict['location'] = [event[player]['x'], event[player]['y']]
        output.append(player_dict)
    return output

In [None]:
#TODO: do same for hawkeye
import json
import os
import conversions
import pandas as pd
import numpy as np
from unxpass.databases import SQLiteDatabase
#might have to lookout for added events
def getSpeedHawkeye(match_id, action_id, tracking, player_to_team, goalkeepers, id_to_event):
    full_id = id_to_event[(str(match_id), action_id)]
    if len(full_id.split("-")) != 6:
        return {"empty": "empty"}
    real_id = full_id.rsplit("-", 1)[0]
    frame_idx = int(full_id.rsplit("-",1)[1])
    pass_data = sequences[sequences['id'] == real_id].iloc[0]
    timeback = 0.4
    time = pass_data['BallReceipt']
    period = pass_data['period']
    minute = int((time + .04 * int(frame_idx)) / 60 + 1)
    second = int((time + .04 * int(frame_idx)) % 60)
    second_range = (second - timeback, second + .01)
    team = pass_data['team_id']
    actor =  pass_data['player_id']
    uefa_map = {}
    file_path_begin = os.listdir(tracking)[0].rsplit("_", 2)[0]
    #goalkeepers = []
    file_path = f"{tracking}/{file_path_begin}_{str(period)}_{str(minute)}.football.samples.centroids"
    all_locs = []
    loc1 = conversions.read_Hawkeye_player_loc(file_path, period, minute, second_range, team,actor, real_id, player_to_team, goalkeepers)
    all_locs.append(loc1)
    if(second - timeback < 0):
        #if the time is negative, we need to get the last frame of the previous minute
        #print("Getting last frame of previous minute")
        file_path = f"{tracking}/{file_path_begin}_{str(period)}_{str(minute - 1)}.football.samples.centroids"
        second_range = (59 + second - timeback, 60)
        loc2 = conversions.read_Hawkeye_player_loc(file_path, period, minute - 1, second_range, team,actor, real_id, player_to_team, goalkeepers)
        all_locs.append(loc2)
    addedtracking = pd.concat(all_locs)
    addedtracking['event_uuid'] = [f"{real_id}-b{i}" for i in range(len(addedtracking), 0, -1)]
    last = clean_dict(addedtracking.loc[0]["freeze_frame"])
    first = clean_dict(addedtracking.loc[len(addedtracking) - 1]["freeze_frame"])
    return getSpeedsHawkeye(first, last, timeback)
def getAllHawkeyeSpeeds(skeleton, hawkeye_db):
    with open("/home/lz80/rdf/sp161/shared/soccer-decision-making/hawkeye_to_sb.json", 'r') as file:
        hawkeye_to_sb = json.load(file)
    output = pd.DataFrame(index = skeleton.index)
    output["speed_frame_360_a0"] = np.nan
    output["speed_frame_360_a0"] = output["speed_frame_360_a0"].astype(object)
    sb_to_hawkeye = {v: k for k, v in hawkeye_to_sb.items()}
    for match_id in skeleton.index.get_level_values(0).unique():
        hawkeye_id = sb_to_hawkeye[int(match_id)]
        sequences = pd.read_csv("/home/lz80/un-xPass/unxpass/steffen/sequences_new.csv")
        tracking = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/allHawkeye/{hawkeye_id}/scrubbed.samples.centroids"
        
        output["speed_frame_360_a0"] = {}
        id_to_event = hawkeye_db.actions(game_id = match_id)['original_event_id'].to_dict()
        player_to_team, goalkeepers = getGksTM(match_id)
        game_mask = skeleton.index.get_level_values(0) == match_id
        for game_id, action_id in skeleton.index[game_mask]:
            print(game_id, action_id)
            #get player to team mapping
            output.at[(game_id, action_id), "speed_frame_360_a0"] = [getSpeedHawkeye(game_id, action_id, tracking, player_to_team, goalkeepers, id_to_event)]
        return output
#getAllHawkeyeSpeeds(match_id, hawkeye_db)
sequences = pd.read_csv("/home/lz80/un-xPass/unxpass/steffen/sequences_new.csv")
skeleton = pd.read_parquet("/home/lz80/rdf/sp161/shared/soccer-decision-making/HawkEye_Features_2/x_endlocation.parquet")
dbpath = "/home/lz80/rdf/sp161/shared/soccer-decision-making/hawkeye_all.sql"
hawkeye_db = SQLiteDatabase(dbpath)
getAllHawkeyeSpeeds(skeleton, hawkeye_db)


  output["speed_frame_360_a0"] = {}


3835331 0
3835331 3
3835331 6
3835331 9
3835331 12
3835331 15
3835331 18
3835331 20
3835331 21
3835331 26
3835331 27
3835331 32
3835331 34
3835331 35
3835331 37
3835331 38
3835331 39
3835331 40
3835331 43
3835331 44
3835331 46
3835331 48
3835331 50
3835331 52
3835331 54
3835331 56
3835331 58
3835331 60
3835331 62
3835331 64
3835331 66
3835331 68
3835331 70
3835331 72
3835331 74
3835331 76
3835331 78
3835331 80
3835331 82


KeyboardInterrupt: 

In [None]:
#adding dummy events prior for speeds for hawkeye data
def hawkeyeGetBefore(row):
    #the very easy efficiency change is to compute by game so we don't have to load the jsons every time for every pass
    with open("/home/lz80/rdf/sp161/shared/soccer-decision-making/hawkeye_to_sb.json", 'r') as file:
        hawkeye_to_sb = json.load(file)
    sb_to_hawkeye = {v: k for k, v in hawkeye_to_sb.items()}
    match_id = row['match_id']
    hawkeye_id = sb_to_hawkeye[match_id]

    id = row['id']
    time = row['BallReceipt']
    sb_event = pd.read_json(f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Hawkeye_AllGames/events/{match_id}.json")
    sb_tracking = pd.read_json(f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Hawkeye_AllGames/three-sixty/{match_id}.json")
    sb_matches = pd.read_json("/home/lz80/rdf/sp161/shared/soccer-decision-making/Hawkeye_AllGames/matches/53/106.json")
    tracking = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/allHawkeye/{hawkeye_id}/scrubbed.samples.centroids"
    out_sb_event = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Hawkeye_b/events/{match_id}.json"
    out_three_sixty = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Hawkeye_b/three-sixty/{match_id}.json"
    file_path_begin = "2021_18_2032206"#this should be different for each game
    import conversions 
    #)
    timeback = 0.1
    sequences.columns
    period = row['period']
    minute = int(time / 60) + 1
    second = int(time) % 60
    second_range = (second - timeback, second)
    team = row['team_id']
    actor = row['player_id']
    uefa_map = {}
    #goalkeepers = []
    file_path = f"{tracking}/{file_path_begin}_{str(period)}_{str(minute)}.football.samples.centroids"
    def getGksTM(game_id):
        lineups = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/womens_euro_receipts/lineups/{game_id}.json"
        lineup_df = pd.read_json(lineups, convert_dates = False)
        team_1 = lineup_df['team_id'].loc[0]
        team_2 = lineup_df['team_id'].loc[1]
        team_1_dict = lineup_df['lineup'].loc[0]
        team_2_dict = lineup_df['lineup'].loc[1]
        team_1_lineup = [player_dict['player_id'] for player_dict in team_1_dict]
        team_2_lineup = [player_dict['player_id'] for player_dict in team_2_dict]
        team_map = {team_1 : team_1_lineup, team_2 : team_2_lineup}#building a map of team id to player ids
        player_to_team = {player_id: team_id for team_id, players in team_map.items() for player_id in players} #mapping players to teams
        pos_dict = {player['player_id']: player['positions'][0]['position'] for player in team_1_dict if len(player['positions']) > 0}
        team_2_pos_dict = {player['player_id']: player['positions'][0]['position'] for player in team_2_dict if len(player['positions']) > 0}
        pos_dict.update(team_2_pos_dict)
        goalkeepers = [key for (key,value) in pos_dict.items() if value == "Goalkeeper"]
        return player_to_team, goalkeepers#gets set of goalkeepers too
    player_to_team, goalkeepers = getGksTM(match_id)
    all_locs = []
    loc1 = conversions.read_Hawkeye_player_loc(file_path, period, minute, second_range, team,actor, id, player_to_team, goalkeepers)
    all_locs.append(loc1)
    if(second - timeback < 0):
        #if the time is negative, we need to get the last frame of the previous minute
        #print("Getting last frame of previous minute")
        file_path = f"{tracking}/{file_path_begin}_{str(period)}_{str(minute - 1)}.football.samples.centroids"
        second_range = (59 + second - timeback, 59)
        loc2 = conversions.read_Hawkeye_player_loc(file_path, period, minute - 1, second_range, team,actor, id, player_to_team, goalkeepers)
        all_locs.append(loc2)
    addedtracking = pd.concat(all_locs)
    addedtracking['event_uuid'] = [f"{id}-b{i}" for i in range(len(addedtracking), 0, -1)]
    framesback = addedtracking.shape[0]
    #conversions
    def add_events(id, framesback):
        #.04 seconds per frame
        sb_row = sb_event[sb_event['id'] == id].reset_index().loc[0]
        ogtime = sb_row['timestamp']
        framediff = pd.Timedelta(seconds = 0.04)
        added_events = pd.DataFrame([sb_row] * framesback)
        added_events['id'] = [f"{id}-b{i}" for i in range(framesback, 0, -1)]
        added_events['timestamp'] = [ogtime - i for i in [j * framediff for j in range(1, framesback + 1)]]
        added_events['index'] = [row['index'] - 1 + i * (1 / (framesback + 1)) for i in range(1, framesback + 1)]
        return added_events
    added_events = add_events(id, framesback)
    eventsadded = pd.concat([sb_event, added_events])
    threesixtyadded = pd.concat([sb_tracking, addedtracking])
    threesixtyadded.to_json(out_three_sixty, orient = "records", lines = True)
    eventsadded.to_json(out_sb_event, orient = "records", lines = True)
    return added_events, addedtracking
import json
sequences = pd.read_csv("/home/lz80/un-xPass/unxpass/steffen/sequences_new.csv")
row = sequences.loc[0]
added_events, added_tracking = hawkeyeGetBefore(row)

In [None]:
player_df_all = pd.read_json(file_path, lines = True, orient = 'columns')
player_dict = player_df_all['samples'].loc[0]['people']
player_df = pd.DataFrame(player_dict)
times = player_df['centroid'].apply(lambda x: x[0]['time'])
times.unique()[0] - times.unique()[1]#getting framerate

In [None]:
game_id = 'DFL-MAT-J03YDU'
labs = model_pass_value.initialize_dataset(dataset_test, model_name = "val").labels
fails = labs[labs['concedes_xg'] > 0].index
#db.actions(game_id = game_id).loc[fails]
gameidx = [idx for idx in fails if idx[0] == game_id]
eventids = db.actions(game_id = game_id).loc[gameidx]['original_event_id']


In [48]:
from unxpass.load_xml import load_tracking, load_csv_event
from unxpass.visualizers_made import get_animation_from_raw
def get_trackingevent(game_id):
    trackingpath = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/zipped_tracking/zip_output/{game_id}.xml"
    eventpath = f"/home/lz80/rdf/sp161/shared/soccer-decision-making/Bundesliga/KPI_Merged_all/KPI_MGD_{game_id}.csv"
    eventdf = load_csv_event(eventpath)
    trackingdf = load_tracking(trackingpath)
    return trackingdf, eventdf

trackingdf, eventdf = get_trackingevent(game_id)

In [None]:
for event_id in eventids:
    print(event_id)

In [None]:
itera = 1
framerate = 5
eventid = eventids[itera]
test = get_animation_from_raw(float(eventid), framerate, eventdf, trackingdf, frameskip = 20, show = False, add_frames = 600)
#eventdf[eventdf["EVENT_ID"] == float(eventid)]
#eventid


In [None]:
test.save("file.gif")