In [7]:
import sys
sys.path.append("..")

from libs.InteractivePitchUI import *
from libs.data_manipulation import *
from libs.dim_reduction import *
from libs.feature_generation import *
from libs.clustering import *
from libs.convex_hull import *
from libs.alpha_shape import *
from libs.weight_generator import *
from libs.similar_movement import *

In [8]:
matches = compile_team_tracking_data_with_labels("../data", "Denmark", "../data/Labelled_ground_truths.csv")
df = extract_one_match(matches, 4)


KeyboardInterrupt: 

In [3]:
df.columns

Index(['match_id', 'Time [s]', 'Time [s]_team', 'half_team', 'minute_team',
       'Period_team', 'home_6_x', 'home_6_y', 'home_18_x', 'home_18_y',
       'home_1_x', 'home_1_y', 'home_10_x', 'home_10_y', 'home_3_x',
       'home_3_y', 'home_2_x', 'home_2_y', 'home_19_x', 'home_19_y',
       'home_21_x', 'home_21_y', 'home_9_x', 'home_9_y', 'home_17_x',
       'home_17_y', 'home_23_x', 'home_23_y', 'home_20_x', 'home_20_y',
       'home_15_x', 'home_15_y', 'home_8_x', 'home_8_y', 'home_5_x',
       'home_5_y', 'home_12_x', 'home_12_y', 'ball_z_team', 'half', 'minute',
       'Period', 'away_10_x', 'away_10_y', 'away_2_x', 'away_2_y', 'away_17_x',
       'away_17_y', 'away_22_x', 'away_22_y', 'away_9_x', 'away_9_y',
       'away_1_x', 'away_1_y', 'away_20_x', 'away_20_y', 'away_21_x',
       'away_21_y', 'away_11_x', 'away_11_y', 'away_13_x', 'away_13_y',
       'away_6_x', 'away_6_y', 'away_7_x', 'away_7_y', 'away_5_x', 'away_5_y',
       'away_14_x', 'away_14_y', 'away_23_x', 'away_23

In [4]:
func_control = lambda x:1
func_inverse = lambda x: 20/x
func_linear = lambda x: 200-x
func_exp = lambda x: np.exp(-x/20)

In [6]:
import pandas as pd
import numpy as np
import datetime
import os

def process_situations(df):

    labeled_rows = df[df['Label'] != "Missing"]
    
    # Step 2: Extract relevant rows (all rows with a label not equal to "Missing")
    relevant_indices = labeled_rows.index.tolist()

    # Step 2: Process each relevant index
    for relevant_index in relevant_indices:
        
        # Step 3: Find similar movements
        similar_movement = find_similar_movement(df, relevant_index, df.iloc[::48].index)
        similar_movement_ranked = sorted(similar_movement, key=lambda x: x[0])

        # Step 4: Select the top 5 indices
        new_indices = [index for _, index in similar_movement_ranked][:5]
        new_indices.append(relevant_index)
        new_indices.append(relevant_index + 1)
        new_df = df.loc[new_indices]

        # Step 5: Apply Wasserstein distance for various functions
        finished_indices_control = most_similar_with_wasserstein(relevant_index, new_df, func_control, steps=1)
        finished_indices_linear = most_similar_with_wasserstein(relevant_index, new_df, func_linear, steps=1)
        finished_indices_inverse = most_similar_with_wasserstein(relevant_index, new_df, func_inverse, steps=1)
        finished_indices_exp = most_similar_with_wasserstein(relevant_index, new_df, func_exp, steps=1)

        # Convert to numpy arrays
        finished_indices_control = np.array(finished_indices_control)
        finished_indices_exp = np.array(finished_indices_exp)
        finished_indices_linear = np.array(finished_indices_linear)
        finished_indices_inverse = np.array(finished_indices_inverse)

        # Step 6: Filter indices based on the time window (±480 seconds)
        time_window_filter = lambda indices: indices[
            (indices > relevant_index + 480) | 
            (indices < relevant_index - 480)
        ]

        finished_indices_control = time_window_filter(finished_indices_control)
        finished_indices_exp = time_window_filter(finished_indices_exp)
        finished_indices_linear = time_window_filter(finished_indices_linear)
        finished_indices_inverse = time_window_filter(finished_indices_inverse)

        # Step 7: Prepare the result DataFrame and save the data
        label = df.loc[relevant_index, 'Label']  # Get the label for the situation
        match_name = df.loc[relevant_index, 'match_name']
        time_of_situation = str(datetime.timedelta(seconds=df.loc[relevant_index, 'Time [s]']))

        # Create the folder based on the label
        folder_name = f"{label}_{match_name}_{time_of_situation}"
        generate_folder(folder_name)

        # Save the time and match_name to a text file
        with open(f"{folder_name}/situation_info.txt", "w") as f:
            f.write(f"Match Name: {match_name}\n")
            f.write(f"Time of Situation: {time_of_situation}\n")

        # For each weighting function, save the corresponding CSVs
        for func_name, finished_indices in zip(
            ['Control', 'Exp', 'Linear', 'Inverse'], 
            [finished_indices_control, finished_indices_exp, finished_indices_linear, finished_indices_inverse]
        ):      
           # Generate the result DataFrame
            result_df = df.loc[finished_indices][["match_name", "Time [s]","half"]]
            result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x: str(datetime.timedelta(seconds=x)))

            # Save the ground truth CSV
            result_df.to_csv(f"{folder_name}/{folder_name}_{func_name}_ground_truth.csv")

            # Shuffle and save the ordering truth CSV
            result_df.sample(frac=1).to_csv(f"{folder_name}/{folder_name}_{func_name}_for_ordering_truth.csv")
process_situations(df)


        match_id  Time [s]  Time [s]_team half_team  minute_team  Period_team  \
143546         2      9.96           9.96        1H            0            1   
143547         2     10.00          10.00        1H            0            1   

         home_6_x  home_6_y  home_18_x  home_18_y  ...  away_16_y  away_25_x  \
143546  35.942333 -4.630569        NaN        NaN  ...        NaN        NaN   
143547  35.846664 -4.708794        NaN        NaN  ...        NaN        NaN   

        away_25_y  home_26_x  home_26_y  away_3_x  away_3_y  away_15_x  \
143546        NaN        NaN        NaN       NaN       NaN        NaN   
143547        NaN        NaN        NaN       NaN       NaN        NaN   

        away_15_y           Label  
143546        NaN  run-into-space  
143547        NaN  run-into-space  

[2 rows x 99 columns]
7
7
[[(35.94233322143555, -4.6305694580078125), (48.8492431640625, -0.7967327833175659), (25.410385131835938, -6.222737789154053), (37.49870681762695, -0.2113593

ValueError: Value and weight array-likes for the same empirical distribution must be of the same size.

In [6]:
relevant_indices = df[df["Label"] == "cross-from-cb"].index.to_numpy()
relevant_index = 0




In [7]:
print(relevant_indices)
print(df.loc[[relevant_indices[relevant_index]]][["match_name","Time [s]"]])

[347164]
            match_name  Time [s]
347164  Denmark_Serbia    2500.0


In [8]:
similar_movement = find_similar_movement(df,relevant_indices[relevant_index],df.iloc[::48].index)

In [9]:
similar_movement_ranked = sorted(similar_movement, key=lambda x:x[0])
new_indices = [index for _,index in similar_movement_ranked][:20]


In [10]:
new_indices.append(relevant_indices[relevant_index])
new_indices.append(relevant_indices[relevant_index]+1)
new_df=df.loc[new_indices]

In [11]:
finished_indices_control = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_control,steps=1)
finished_indices_linear =most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_linear,steps=1) 
finished_indices_inverse = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_inverse,steps=1)
finished_indices_exp = most_similar_with_wasserstein(relevant_indices[relevant_index], new_df,func_exp,steps=1)

        match_id  Time [s]  Time [s]_team half_team  minute_team  Period_team  \
347164         3   2500.00        2500.00        1H           41            1   
347165         3   2500.04        2500.04        1H           41            1   

         home_6_x  home_6_y  home_18_x  home_18_y  ...  away_25_y  home_26_x  \
347164 -17.472734 -2.775934 -39.604385  20.131933  ...        NaN        NaN   
347165 -17.472685 -2.732125 -39.561913  20.309237  ...        NaN        NaN   

        home_26_y  away_3_x  away_3_y  away_15_x  away_15_y          Label  \
347164        NaN       NaN       NaN        NaN        NaN  cross-from-cb   
347165        NaN       NaN       NaN        NaN        NaN        Missing   

        Time[s]  Competition  
347164   2500.0   "EURO2024"  
347165      NaN          NaN  

[2 rows x 101 columns]
22
22
[[(-17.472734451293945, -2.775934219360352), (-39.60438537597656, 20.131933212280277), (30.747602462768555, -0.2054070234298706), (-27.998794555664062, 14.30

In [12]:
finished_indices_control = np.array(finished_indices_control)
finished_indices_exp = np.array(finished_indices_exp)
finished_indices_linear = np.array(finished_indices_linear)
finished_indices_inverse = np.array(finished_indices_inverse)

# Perform the comparison
finished_indices_control = finished_indices_control[
    (finished_indices_control > relevant_indices[relevant_index] + 480) | 
    (finished_indices_control < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_exp = finished_indices_exp[
    (finished_indices_exp > relevant_indices[relevant_index] + 480) | 
    (finished_indices_exp < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_inverse = finished_indices_inverse[
    (finished_indices_inverse > relevant_indices[relevant_index] + 480) | 
    (finished_indices_inverse < relevant_indices[relevant_index] - 480)
]

# Perform the comparison
finished_indices_linear = finished_indices_linear[
    (finished_indices_linear > relevant_indices[relevant_index] + 480) | 
    (finished_indices_linear < relevant_indices[relevant_index] - 480)
]



In [13]:
PitchDisplay(df, [relevant_indices[relevant_index]])

VBox(children=(Dropdown(description='Select Index:', options=(347164,), value=347164), Output()))

<libs.InteractivePitchUI.PitchDisplay at 0x7f13389e37c0>

In [14]:
PitchDisplay(df,finished_indices_exp)



VBox(children=(Dropdown(description='Select Index:', options=(291408, 291360, 399984, 286896, 462048, 479376, …

<libs.InteractivePitchUI.PitchDisplay at 0x7f141442e140>

In [15]:
import datetime



result_df = df.loc[finished_indices_control][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))

title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name = "Control_function" + title_seperate[0]+"_"+title_seperate[1]

generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Control_functionDenmark_Serbia_2500.0' does not exist.


In [16]:
result_df = df.loc[finished_indices_linear][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Linear_function" + title_seperate[0]+"_"+title_seperate[1]
generate_folder(folder_name)

result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Linear_functionDenmark_Serbia_2500.0' does not exist.


In [17]:
result_df = df.loc[finished_indices_inverse][["match_name","Time [s]"]]
result_df["Time [s]"] = result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Inverse_function" + title_seperate[0]+"_"+title_seperate[1]

generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")

result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Inverse_functionDenmark_Serbia_2500.0' does not exist.


In [18]:
result_df = df.loc[finished_indices_exp][["match_name","Time [s]"]]
result_df["Time [s]"] =result_df["Time [s]"].apply(lambda x : str(datetime.timedelta(seconds=x)))
title_seperate = list(map(lambda x: str(x),df.loc[relevant_indices[relevant_index]][["match_name","Time [s]"]].to_numpy()))

folder_name ="Exp_function" + title_seperate[0]+"_"+title_seperate[1]
generate_folder(folder_name)

saved_df = result_df.to_csv(folder_name + "/" +folder_name+"_ground_truth.csv")
result_df.sample(frac=1).to_csv(folder_name + "/" +folder_name+"_for_ordering_truth.csv")

Folder 'Exp_functionDenmark_Serbia_2500.0' does not exist.
