In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Read data from csv. in folder data/H_EURO2024GERMANY
import os
import shutil

def save_team_matches(data_folder, team_name, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through all folders in the data directory
    for match_folder in os.listdir(data_folder):
        # Check if the item is a folder and if it contains the team name
        if os.path.isdir(os.path.join(data_folder, match_folder)) and team_name in match_folder:
            # Extract home and away team from folder name (assumes format "ID_Home_Away")
            parts = match_folder.split('_')
            if len(parts) >= 3:
                home_team = parts[1]
                away_team = parts[2]
                
                # Determine which file to save based on team position
                match_path = os.path.join(data_folder, match_folder)
                if home_team == team_name:
                    file_to_save = os.path.join(match_path, 'tracking_home.csv')
                    save_as = f"{match_folder}.csv"
                elif away_team == team_name:
                    file_to_save = os.path.join(match_path, 'tracking_away.csv')
                    save_as = f"{match_folder}.csv"
                else:
                    continue

                # Copy the file to the output directory
                if os.path.exists(file_to_save):
                    shutil.copy(file_to_save, os.path.join(output_folder, save_as))
                    print(f"Saved {file_to_save} as {save_as} in {output_folder}")
                else:
                    print(f"File not found: {file_to_save}")

# Example usage
data_folder = "data"
output_folder = "data/Gennembrud/Danish_matches"
team_name = "Denmark"
save_team_matches(data_folder, team_name, output_folder)



Saved data/2036190_Denmark_Serbia/tracking_home.csv as 2036190_Denmark_Serbia.csv in data/Gennembrud/Danish_matches
Saved data/2036197_Germany_Denmark/tracking_away.csv as 2036197_Germany_Denmark.csv in data/Gennembrud/Danish_matches
Saved data/2036178_Denmark_England/tracking_home.csv as 2036178_Denmark_England.csv in data/Gennembrud/Danish_matches
Saved data/2036165_Slovenia_Denmark/tracking_away.csv as 2036165_Slovenia_Denmark.csv in data/Gennembrud/Danish_matches


In [2]:
import pandas as pd
import os
import re
from collections import defaultdict

def label_match_data(labelled_data_file, data_folder, output_folder):
    # Load the labelled data
    labelled_data = pd.read_csv(labelled_data_file)

    # Convert Time[s] from hh:mm:ss or mm:ss format to seconds
    def time_to_seconds(time_str):
        parts = time_str.split(":")
        if len(parts) == 3:
            h, m, s = map(float, parts)
        elif len(parts) == 2:
            h = 0.0
            m, s = map(float, parts)
        else:
            raise ValueError(f"Unexpected time format: {time_str}")
        return h * 3600 + m * 60 + s  # Convert to total seconds
    

    labelled_data['Time_in_seconds'] = labelled_data['Time[s]'].apply(time_to_seconds)
    
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Dictionary to store labels for each match file
    labels_to_apply = defaultdict(list)

    # Accumulate labels for each match file
    for index, row in labelled_data.iterrows():
        label = row['Label']
        time_in_seconds = row['Time_in_seconds']
        match_name = row['Match']
        
        # Extract home and away teams from match_name
        teams = match_name.split('_')
        if len(teams) < 2:
            print(f"Invalid match format: {match_name}")
            continue
        team1, team2 = teams[0].strip(" \""), teams[1].rstrip("\"")
        
        # Find the matching file in Danish_matches
        match_file = None
        print(team1)
        print(teams)
        for file in os.listdir(data_folder):

            if re.search(rf"^\d*_{team1}.*{team2}\.csv|^\d*_{team2}.*{team1}\.csv", file):
                match_file = os.path.join(data_folder, file)
                print(match_file)
                break

        # If a matching file was found, add the label and time to the list for this match file
        if match_file:
            labels_to_apply[match_file].append((time_in_seconds, label))
        else:
            print(f"No matching file found for match: {match_name}")
    
    # Apply all labels for each match file in one pass and save the result
    for match_file, labels in labels_to_apply.items():
        match_data = pd.read_csv(match_file)
        
        # Check if the 'Time [s]' column exists
        if "Time [s]" not in match_data.columns:
            print(f"'Time [s]' column not found in {match_file}. Available columns: {match_data.columns}")
            continue
        
        # Check if the 'Label' column exists; if not, create it
        if 'Label' not in match_data.columns:
            match_data['Label'] = ""

        # Apply each label for this match file and capture minutes
        labeled_minutes = []  # List to store minutes for each labeled row
        for time_in_seconds, label in labels:
            tolerance = 0.0001  # Adjust tolerance if needed
            matched_rows = match_data[(match_data['Time [s]'] - time_in_seconds).abs() <= tolerance]

            if not matched_rows.empty:
                row_index = matched_rows.index[0]  # Use the first match if multiple rows match
                # Apply the label if there is no existing label or append if already labeled
                if pd.isna(match_data.at[row_index, 'Label']) or match_data.at[row_index, 'Label'] == "":
                    match_data.at[row_index, 'Label'] = label
                else:
                    # Append the new label to the existing one (to avoid overwriting)
                    match_data.at[row_index, 'Label'] += f", {label}"
                
                # Capture the minute for the labeled row
                labeled_minute = match_data.at[row_index, 'minute']
                labeled_minutes.append((label, labeled_minute))
                print(f"Label '{label}' applied to row {row_index} in {os.path.basename(match_file)} at minute {labeled_minute}")

        # Print out all labeled minutes for the current match file
        print(f"Labeled minutes in {os.path.basename(match_file)}: {labeled_minutes}")

        # Save the updated match data with all labels applied
        output_file = os.path.join(output_folder, os.path.basename(match_file))
        match_data.to_csv(output_file, index=False)
        print(f"Saved updated file {output_file}")

# Example usage
labelled_data_file = "data/Labelled_ground_truths.csv"  # path to Labelled_ground_truths.csv
data_folder = "data/Gennembrud/Danish_matches"      # folder with saved Danish matches
output_folder = "data/Gennembrud/Labelled_Matches"  # folder to save the labeled output
label_match_data(labelled_data_file, data_folder, output_folder)


Denmark
[' "Denmark', 'Germany"']
data/Gennembrud/Danish_matches/2036197_Germany_Denmark.csv
Denmark
[' "Denmark', 'Serbia"']
data/Gennembrud/Danish_matches/2036190_Denmark_Serbia.csv
Denmark
[' "Denmark', 'Slovenia"']
data/Gennembrud/Danish_matches/2036165_Slovenia_Denmark.csv
Denmark
[' "Denmark', 'Slovenia"']
data/Gennembrud/Danish_matches/2036165_Slovenia_Denmark.csv
Denmark
[' "Denmark', 'Slovenia"']
data/Gennembrud/Danish_matches/2036165_Slovenia_Denmark.csv
Denmark
[' "Denmark', 'Slovenia"']
data/Gennembrud/Danish_matches/2036165_Slovenia_Denmark.csv
Denmark
[' "Denmark', 'England"']
data/Gennembrud/Danish_matches/2036178_Denmark_England.csv
Denmark
[' "Denmark', 'Germany"']
data/Gennembrud/Danish_matches/2036197_Germany_Denmark.csv
Denmark
[' "Denmark', 'Serbia"']
data/Gennembrud/Danish_matches/2036190_Denmark_Serbia.csv
Denmark
[' "Denmark', 'Serbia"']
data/Gennembrud/Danish_matches/2036190_Denmark_Serbia.csv
Denmark
[' "Denmark', 'Serbia"']
data/Gennembrud/Danish_matches/2036