In [1]:
import sys
sys.path.append("..")

from libs.InteractivePitchUI import *
from libs.data_manipulation import *
from libs.dim_reduction import *
from libs.feature_generation import *
from libs.clustering import *
from libs.convex_hull import *
from libs.alpha_shape import *
from libs.weight_generator import *
from libs.similar_movement import *

In [2]:
matches = compile_team_tracking_data_with_labels("../data/H_EURO2024GERMANY", "Denmark", "../data/Labelled_ground_truths.csv")
df = extract_one_match(matches, 4)


In [3]:
df.columns

Index(['match_id', 'Time [s]', 'Time [s]_team', 'half_team', 'minute_team',
       'Period_team', 'home_6_x', 'home_6_y', 'home_18_x', 'home_18_y',
       ...
       'away_25_y', 'home_26_x', 'home_26_y', 'away_3_x', 'away_3_y',
       'away_15_x', 'away_15_y', 'Label', 'Time[s]', 'Competition'],
      dtype='object', length=101)

In [4]:
func_control = lambda x:1
func_inverse = lambda x: 20/x
func_linear = lambda x: 200-x
func_exp = lambda x: np.exp(-x/20)

In [5]:
df.loc[414096]

match_id                         3
Time [s]                    5119.0
Time [s]_team               5119.0
half_team                       2H
minute_team                     85
                       ...        
away_15_x                      NaN
away_15_y                      NaN
Label            breakthrough-even
Time[s]                     5119.0
Competition               EURO2024
Name: 414096, Length: 101, dtype: object

In [6]:
import pandas as pd
import numpy as np
import datetime
import os

def process_situations(df):

    labeled_rows = df[df['Label'] != "Missing"]
    
    # Step 2: Extract relevant rows (all rows with a label not equal to "Missing")
    relevant_indices = labeled_rows.index.tolist()

    # Step 2: Process each relevant index
    for relevant_index in relevant_indices:
        
        # Step 3: Find similar movements
        similar_movement = find_similar_movement(df, relevant_index, df.iloc[::48].index)
        similar_movement_ranked = sorted(similar_movement, key=lambda x: x[0])
        # Step 6: Filter indices based on the time window (±480 seconds)
        time_window_filter = lambda indices: indices[
            (indices > relevant_index + 480) | 
            (indices < relevant_index - 480)
        ]


        # Step 4: Select the top 5 indices
        new_indices = [index for _, index in similar_movement_ranked]
        new_indices = time_window_filter(np.array(new_indices))[:5].tolist()
        
        new_indices.append(relevant_index)
        new_indices.append(relevant_index + 1)
        new_df = df.loc[new_indices]

        # Step 5: Apply Wasserstein distance for various functions
        finished_indices_control = most_similar_with_wasserstein(relevant_index, new_df, func_control, steps=1)
        finished_indices_linear = most_similar_with_wasserstein(relevant_index, new_df, func_linear, steps=1)
        finished_indices_inverse = most_similar_with_wasserstein(relevant_index, new_df, func_inverse, steps=1)
        finished_indices_exp = most_similar_with_wasserstein(relevant_index, new_df, func_exp, steps=1)

        # Convert to numpy arrays
        finished_indices_control = np.array(finished_indices_control)
        finished_indices_exp = np.array(finished_indices_exp)
        finished_indices_linear = np.array(finished_indices_linear)
        finished_indices_inverse = np.array(finished_indices_inverse)

        

        finished_indices_control = time_window_filter(finished_indices_control)
        finished_indices_exp = time_window_filter(finished_indices_exp)
        finished_indices_linear = time_window_filter(finished_indices_linear)
        finished_indices_inverse = time_window_filter(finished_indices_inverse)

        # Step 7: Prepare the result DataFrame and save the data
        label = df.loc[relevant_index, 'Label']  # Get the label for the situation
        match_name = df.loc[relevant_index, 'match_name']
        time_of_situation = str(datetime.timedelta(seconds=df.loc[relevant_index, 'Time [s]']))

        # Create the folder based on the label
        folder_name = f"{label}_{match_name}_{time_of_situation}"
        generate_folder(folder_name)

        # Save the time and match_name to a text file
        with open(f"{folder_name}/situation_info.txt", "w") as f:
            f.write(f"Match Name: {match_name}\n")
            f.write(f"Time of Situation: {time_of_situation}\n")

        # For each weighting function, save the corresponding CSVs
        for func_name, finished_indices in zip(
            ['Control', 'Exp', 'Linear', 'Inverse'], 
            [finished_indices_control, finished_indices_exp, finished_indices_linear, finished_indices_inverse]
        ):      
           # Generate the result DataFrame
            result_df = df.loc[finished_indices][["match_name", "Time [s]","half"]]
            result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x: str(datetime.timedelta(seconds=x)))

            # Save the ground truth CSV
            result_df.to_csv(f"{folder_name}/{folder_name}_{func_name}_ground_truth.csv")

            # Shuffle and save the ordering truth CSV
            result_df.sample(frac=1).to_csv(f"{folder_name}/{folder_name}_{func_name}_for_ordering_truth.csv")
process_situations(df)


0
1
2
3
4
5
6
198.91180935544085
191.05190318442956
198.94104546675064
190.9981650598253
197.66806737516288
192.0336204664956
199.4025268750435
193.82031103790914
198.3252875289431
199.73807013362637
198.91180935544085
191.05190318442956
198.94104546675064
190.9981650598253
0
1
2
3
4
5
6
0
1
2
3
4
5
6
0
1
2
3
4
5
6
Folder 'run-into-space_Denmark_England_0:00:10' has been removed successfully.
0
1
2
3
4
5
6
190.63563720578438
199.42710904991856
190.57231025669194
199.41752275966877
194.24558228342212
190.78574082926227
198.74398795047404
196.6328075002372
199.71092355462144
192.41091008153705
192.04427736495518
191.16626738896218
197.81719286232766
195.6839404311801
198.77128949274507
190.63563720578438
199.42710904991856
190.57231025669194
199.41752275966877
0
1
2
3
4
5
6
0
1
2
3
4
5
6
0
1
2
3
4
5
6
Folder 'breakthrough-wb_Denmark_England_0:07:22' has been removed successfully.
0
1
2
3
4
5
6
199.22990794496175
194.0117330013975
199.2475565565892
194.02514239755433
195.58363668120535
19

In [None]:
relevant_indices = df[df["Label"] == "cross-from-cb"].index.to_numpy()
relevant_index = 0




In [None]:
print(relevant_indices)
print(df.loc[[relevant_indices[relevant_index]]][["match_name","Time [s]"]])

[347164]
            match_name  Time [s]
347164  Denmark_Serbia    2500.0


In [None]:
similar_movement = find_similar_movement(df,relevant_indices[relevant_index],df.iloc[::48].index)

In [None]:
similar_movement_ranked = sorted(similar_movement, key=lambda x:x[0])
new_indices = [index for _,index in similar_movement_ranked][:20]


In [None]:
new_indices.append(relevant_indices[relevant_index])
new_indices.append(relevant_indices[relevant_index]+1)
new_df=df.loc[new_indices]

In [None]:
finished_indices_control = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_control,steps=1)
finished_indices_linear =most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_linear,steps=1) 
finished_indices_inverse = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_inverse,steps=1)
finished_indices_exp = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_exp,steps=1)

        match_id  Time [s]  Time [s]_team half_team  minute_team  Period_team  \
347164         3   2500.00        2500.00        1H           41            1   
347165         3   2500.04        2500.04        1H           41            1   

         home_6_x  home_6_y  home_18_x  home_18_y  ...  away_25_y  home_26_x  \
347164 -17.472734 -2.775934 -39.604385  20.131933  ...        NaN        NaN   
347165 -17.472685 -2.732125 -39.561913  20.309237  ...        NaN        NaN   

        home_26_y  away_3_x  away_3_y  away_15_x  away_15_y          Label  \
347164        NaN       NaN       NaN        NaN        NaN  cross-from-cb   
347165        NaN       NaN       NaN        NaN        NaN        Missing   

        Time[s]  Competition  
347164   2500.0   "EURO2024"  
347165      NaN          NaN  

[2 rows x 101 columns]
22
22
[[(-17.472734451293945, -2.775934219360352), (-39.60438537597656, 20.131933212280277), (30.747602462768555, -0.2054070234298706), (-27.998794555664062, 14.30

In [None]:
finished_indices_control = np.array(finished_indices_control)
finished_indices_exp = np.array(finished_indices_exp)
finished_indices_linear = np.array(finished_indices_linear)
finished_indices_inverse = np.array(finished_indices_inverse)

# Perform the comparison
finished_indices_control = finished_indices_control[
    (finished_indices_control > relevant_indices[relevant_index] + 480) | 
    (finished_indices_control < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_exp = finished_indices_exp[
    (finished_indices_exp > relevant_indices[relevant_index] + 480) | 
    (finished_indices_exp < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_inverse = finished_indices_inverse[
    (finished_indices_inverse > relevant_indices[relevant_index] + 480) | 
    (finished_indices_inverse < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_linear = finished_indices_linear[
    (finished_indices_linear > relevant_indices[relevant_index] + 480) | 
    (finished_indices_linear < relevant_indices[relevant_index] - 480)
]



In [None]:
PitchDisplay(df, [relevant_indices[relevant_index]])

VBox(children=(Dropdown(description='Select Index:', options=(347164,), value=347164), Output()))

<libs.InteractivePitchUI.PitchDisplay at 0x7f13389e37c0>

In [None]:
PitchDisplay(df,finished_indices_exp)



VBox(children=(Dropdown(description='Select Index:', options=(291408, 291360, 399984, 286896, 462048, 479376, …

<libs.InteractivePitchUI.PitchDisplay at 0x7f141442e140>

In [None]:
import datetime



result_df = df.loc[finished_indices_control][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))

title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name = "Control_function" + title_seperate[0]+"_"+title_seperate[1]

generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Control_functionDenmark_Serbia_2500.0' does not exist.


In [None]:
result_df = df.loc[finished_indices_linear][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Linear_function" + title_seperate[0]+"_"+title_seperate[1]
generate_folder(folder_name)

result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Linear_functionDenmark_Serbia_2500.0' does not exist.


In [None]:
result_df = df.loc[finished_indices_inverse][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Inverse_function" + title_seperate[0]+"_"+title_seperate[1]

generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")

result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Inverse_functionDenmark_Serbia_2500.0' does not exist.


In [None]:
result_df = df.loc[finished_indices_exp][["match_name","Time [s]"]]
result_df["Time [s]"] =result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Exp_function" + title_seperate[0]+"_"+title_seperate[1]
generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Exp_functionDenmark_Serbia_2500.0' does not exist.
