In [1]:
import sys
sys.path.append("..")

from libs.InteractivePitchUI import *
from libs.data_manipulation import *
from libs.dim_reduction import *
from libs.feature_generation import *
from libs.clustering import *
from libs.convex_hull import *
from libs.alpha_shape import *
from libs.weight_generator import *
from libs.similar_movement import *



In [2]:
matches = compile_team_tracking_data_with_labels("../data", "Denmark", "../data/Labelled_ground_truths.csv")
df = extract_one_match(matches, 4)


In [3]:
df.columns

Index(['match_id', 'Time [s]', 'Time [s]_team', 'half_team', 'minute_team',
       'Period_team', 'home_6_x', 'home_6_y', 'home_18_x', 'home_18_y',
       ...
       'away_25_y', 'home_26_x', 'home_26_y', 'away_3_x', 'away_3_y',
       'away_15_x', 'away_15_y', 'Label', 'Time[s]', 'Competition'],
      dtype='object', length=101)

In [4]:
func_control = lambda x:1
func_inverse = lambda x: 20/x
func_linear = lambda x: 200-x
func_exp = lambda x: np.exp(-x/20)

In [None]:
import pandas as pd
import numpy as np
import datetime
import os

def process_situations(df):
    # Step 1: Randomly select one row per unique label
    unique_labels = df['Label'].dropna().unique()  # Get unique labels
    relevant_indices = []

    for label in unique_labels:
        label_df = df[df['Label'] == label]  # Filter rows with the current label
        random_index = label_df.sample(n=1).index[0]  # Randomly select one row from the label group
        relevant_indices.append(random_index)

    # Step 2: Process each relevant index
    for relevant_index in relevant_indices:
        print(relevant_index)
        # Step 3: Find similar movements
        similar_movement = find_similar_movement(df, relevant_index, df.iloc[::48].index)
        similar_movement_ranked = sorted(similar_movement, key=lambda x: x[0])

        # Step 4: Select the top 5 indices
        new_indices = [index for _, index in similar_movement_ranked][:5]
        new_indices.append(relevant_index)
        new_indices.append(relevant_index + 1)
        new_df = df.loc[new_indices]

        # Step 5: Apply Wasserstein distance for various functions
        finished_indices_control = most_similar_with_wasserstein(relevant_index, new_df, func_control, steps=1)
        finished_indices_linear = most_similar_with_wasserstein(relevant_index, new_df, func_linear, steps=1)
        finished_indices_inverse = most_similar_with_wasserstein(relevant_index, new_df, func_inverse, steps=1)
        finished_indices_exp = most_similar_with_wasserstein(relevant_index, new_df, func_exp, steps=1)

        # Convert to numpy arrays
        finished_indices_control = np.array(finished_indices_control)
        finished_indices_exp = np.array(finished_indices_exp)
        finished_indices_linear = np.array(finished_indices_linear)
        finished_indices_inverse = np.array(finished_indices_inverse)

        # Step 6: Filter indices based on the time window (±480 seconds)
        time_window_filter = lambda indices: indices[
            (indices > relevant_index + 480) | 
            (indices < relevant_index - 480)
        ]

        finished_indices_control = time_window_filter(finished_indices_control)
        finished_indices_exp = time_window_filter(finished_indices_exp)
        finished_indices_linear = time_window_filter(finished_indices_linear)
        finished_indices_inverse = time_window_filter(finished_indices_inverse)

        # Step 7: Prepare the result DataFrame and save the data
        label = df.loc[relevant_index, 'Label']  # Get the label for the situation
        match_name = df.loc[relevant_index, 'match_name']
        time_of_situation = str(datetime.timedelta(seconds=df.loc[relevant_index, 'Time [s]']))

        # Create the folder based on the label
        folder_name = f"{label}_{match_name}_{time_of_situation}"
        generate_folder(folder_name)

        # Save the time and match_name to a text file
        with open(f"{folder_name}/situation_info.txt", "w") as f:
            f.write(f"Match Name: {match_name}\n")
            f.write(f"Time of Situation: {time_of_situation}\n")

        # For each weighting function, save the corresponding CSVs
        for func_name, finished_indices in zip(
            ['Control', 'Exp', 'Linear', 'Inverse'], 
            [finished_indices_control, finished_indices_exp, finished_indices_linear, finished_indices_inverse]
        ):      
           # Generate the result DataFrame
            result_df = df.loc[finished_indices][["match_name", "Time [s]","half"]]
            result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x: str(datetime.timedelta(seconds=x)))

            # Save the ground truth CSV
            result_df.to_csv(f"{folder_name}/{folder_name}_{func_name}_ground_truth.csv")

            # Shuffle and save the ordering truth CSV
            result_df.sample(frac=1).to_csv(f"{folder_name}/{folder_name}_{func_name}_for_ordering_truth.csv")
process_situations(df)


501019
        match_id  Time [s]  Time [s]_team half_team  minute_team  Period_team  \
501019         4   2819.48        2819.48        2H           46            2   
501020         4   2819.52        2819.52        2H           46            2   

         home_6_x   home_6_y  home_18_x  home_18_y  ...  away_25_y  home_26_x  \
501019  29.656094 -29.117348   7.647442  14.492158  ...        NaN        NaN   
501020  29.617172 -29.096554   7.615572  14.482178  ...        NaN        NaN   

        home_26_y   away_3_x  away_3_y  away_15_x  away_15_y    Label  \
501019        NaN -15.053794  0.679422 -15.824062 -15.706592  Missing   
501020        NaN -15.068914  0.691763 -15.814822 -15.692156  Missing   

        Time[s]  Competition  
501019      NaN          NaN  
501020      NaN          NaN  

[2 rows x 101 columns]
7
7
[[(29.65609359741211, -29.11734771728516), (7.647441864013672, 14.492157936096191), (44.3969612121582, -5.331360340118408), (-7.6875386238098145, -30.94013595581054

In [50]:
relevant_indices = df[df["Label"] == "cross-from-cb"].index.to_numpy()
relevant_index = 0




In [51]:
print(relevant_indices)
print(df.loc[[relevant_indices[relevant_index]]][["match_name","Time [s]"]])

[347164 499657]
            match_name  Time [s]
347164  Denmark_Serbia    2500.0


In [52]:
similar_movement = find_similar_movement(df,relevant_indices[relevant_index],df.iloc[::48].index)

In [53]:
similar_movement_ranked = sorted(similar_movement, key=lambda x:x[0])
new_indices = [index for _,index in similar_movement_ranked][:20]


In [54]:
new_indices.append(relevant_indices[relevant_index])
new_indices.append(relevant_indices[relevant_index]+1)
new_df=df.loc[new_indices]

In [55]:
finished_indices_control = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_control,steps=1)
finished_indices_linear =most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_linear,steps=1) 
finished_indices_inverse = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_inverse,steps=1)
finished_indices_exp = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_exp,steps=1)

        match_id  Time [s]  Time [s]_team half_team  minute_team  Period_team  \
347164         3   2500.00        2500.00        1H           41            1   
347165         3   2500.04        2500.04        1H           41            1   

         home_6_x  home_6_y  home_18_x  home_18_y  ...  away_25_y  home_26_x  \
347164 -17.472734 -2.775934 -39.604385  20.131933  ...        NaN        NaN   
347165 -17.472685 -2.732125 -39.561913  20.309237  ...        NaN        NaN   

        home_26_y  away_3_x  away_3_y  away_15_x  away_15_y          Label  \
347164        NaN       NaN       NaN        NaN        NaN  cross-from-cb   
347165        NaN       NaN       NaN        NaN        NaN        Missing   

        Time[s]  Competition  
347164   2500.0   "EURO2024"  
347165      NaN          NaN  

[2 rows x 101 columns]
22
22
[[(-17.472734451293945, -2.775934219360352), (-39.60438537597656, 20.131933212280277), (30.747602462768555, -0.2054070234298706), (-27.998794555664062, 14.30

In [56]:
finished_indices_control = np.array(finished_indices_control)
finished_indices_exp = np.array(finished_indices_exp)
finished_indices_linear = np.array(finished_indices_linear)
finished_indices_inverse = np.array(finished_indices_inverse)

# Perform the comparison
finished_indices_control = finished_indices_control[
    (finished_indices_control > relevant_indices[relevant_index] + 480) | 
    (finished_indices_control < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_exp = finished_indices_exp[
    (finished_indices_exp > relevant_indices[relevant_index] + 480) | 
    (finished_indices_exp < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_inverse = finished_indices_inverse[
    (finished_indices_inverse > relevant_indices[relevant_index] + 480) | 
    (finished_indices_inverse < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_linear = finished_indices_linear[
    (finished_indices_linear > relevant_indices[relevant_index] + 480) | 
    (finished_indices_linear < relevant_indices[relevant_index] - 480)
]



In [57]:
PitchDisplay(df, [relevant_indices[relevant_index]])

VBox(children=(Dropdown(description='Select Index:', options=(347164,), value=347164), Output()))

<libs.InteractivePitchUI.PitchDisplay at 0x7f80d59496f0>

In [64]:
PitchDisplay(df,finished_indices_exp)



VBox(children=(Dropdown(description='Select Index:', options=(291408, 291360, 399984, 286896, 462048, 479376, …

<libs.InteractivePitchUI.PitchDisplay at 0x7f80d53a7e50>

In [59]:
import datetime



result_df = df.loc[finished_indices_control][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))

title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name = "Control_function" + title_seperate[0]+"_"+title_seperate[1]

generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Control_functionDenmark_Serbia_2500.0' does not exist.


In [60]:
result_df = df.loc[finished_indices_linear][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Linear_function" + title_seperate[0]+"_"+title_seperate[1]
generate_folder(folder_name)

result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Linear_functionDenmark_Serbia_2500.0' does not exist.


In [61]:
result_df = df.loc[finished_indices_inverse][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Inverse_function" + title_seperate[0]+"_"+title_seperate[1]

generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")

result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Inverse_functionDenmark_Serbia_2500.0' does not exist.


In [62]:
result_df = df.loc[finished_indices_exp][["match_name","Time [s]"]]
result_df["Time [s]"] =result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Exp_function" + title_seperate[0]+"_"+title_seperate[1]
generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Exp_functionDenmark_Serbia_2500.0' does not exist.
