This is a script to label data using speific label files obtained from the tool label-studio. It is neccessary to specify the path to the label files and the path to the data files. 
The script will save the labels in a csv file.

In [1]:
import sys
import os 
import numpy as np
import pandas as pd
import json 
from pathlib import Path

# Add the parent directory to sys.path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

from constants import LABEL_NAMES
INVERTED_LABELS = {value: key for key, value in LABEL_NAMES.items()}

In [2]:
def extract_files_from_labels(labels_data:dict)->dict:
    """This function will extract the available files and index from the labels data"""
    files = {}
    for idx, data in enumerate(labels_data):
        available_file = data["data"]["csv_url"] #'/data/upload/6/52fb5fa0-Breaststroke1718901708253.csv'
        
        # Extract the file name
        file_name = available_file.split("/")[-1].split("-")[-1]
                    
        files[file_name] = idx
    return files
        

In [None]:
def write_labels(csv_file:str, label_data):
    """This function will write the labels into the csv
    Args:
        csv_file (str): The csv file to be labeled
        label_data (dict): The label data from the annotation
    """
    print(f"\n============== Labeling file {os.path.basename(csv_file)} ==============")
    df_csv = pd.read_csv(csv_file)
        
    # Check if df_csv has label column otherwise add it with value -1
    if 'label' not in df_csv.columns: 
        print("Adding label column to the file")
        df_csv['label'] = 0
    
    for annotation in label_data['annotations']:
        results = annotation['result']
        for item in results:
            start = item['value']['start']
            end = item['value']['end']
            label = item['value']["timeserieslabels"][0]
            decoded_label = INVERTED_LABELS[label]
            df_csv.loc[(df_csv['timestamp'] >= start) & (df_csv['timestamp'] <= end), 'label'] = decoded_label
            
            print(f"Labeling from {start} to {end} with label {decoded_label}")
     
    # Save the labeled file
    # labeled_file = csv_file.replace(".csv", "_labeled.csv")
    df_csv.to_csv(csv_file, index=False)
    print(f"File saved into {os.path.dirname(csv_file)}")
    
        

In [None]:
def start_labeling(main_path:str, labels_data:list, available_files:dict):
    """
    This function will start looking for the csv files in the main path and will start labeling them.
    
    Args:
    - main_path: str, the main path where the csv files are located.
    - labels_data: dict, the labels data that contains the annotations.
    - available_files: dict, the available files that are in the labels data. {file_name: index}
    """
    for root, _, files in os.walk(main_path):
        # Generate the corresponding destination directory path
        for file in files:
            if file.endswith('.csv') and file in available_files:
                csv_file = os.path.join(root, file)
                write_labels(csv_file, labels_data[available_files[file]])


In [None]:
# Path where the data to be labeled is stored
data_path = "data_processed" 
labels_folder = "labels"

print(f"Starting labeling ...")
for root, _, files in os.walk(labels_folder):
    for file in files:
        if file.endswith('.json'):
            label_file = os.path.join(root, file)
            print(f"\nLabel file found: {label_file}")
            with open(label_file) as f:
                label_data = json.load(f)
                available_files = extract_files_from_labels(label_data)
                start_labeling(data_path, label_data, available_files)

Starting labeling ...

Label file found: C:\Users\omare\OneDrive\Documentos\TU Berlin\Master\Codigo\Master\data\labels\project-6-at-2024-09-12-21-01-56564302.json

Labeling from 178355532502839.34 to 178440343194360.16 with label 2
File saved into C:\Users\omare\OneDrive\Documentos\TU Berlin\Master\Codigo\Master\data\data_omar_processed\pixel\12_09_24

Labeling from 178453841633433.1 to 178559210327532.34 with label 2
File saved into C:\Users\omare\OneDrive\Documentos\TU Berlin\Master\Codigo\Master\data\data_omar_processed\pixel\12_09_24

Labeling from 178608446942020.44 to 178711701728303.16 with label 2
File saved into C:\Users\omare\OneDrive\Documentos\TU Berlin\Master\Codigo\Master\data\data_omar_processed\pixel\12_09_24

Labeling from 178731272071484.72 to 178843470532144.34 with label 2
File saved into C:\Users\omare\OneDrive\Documentos\TU Berlin\Master\Codigo\Master\data\data_omar_processed\pixel\12_09_24

Labeling from 178865087024311 to 178974909395799.62 with label 2
File sav