In [2]:
import json
import os
import pandas as pd


def process_json_labels(json_file_path):
    # Charger les données JSON
    with open(json_file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    base_path = os.path.dirname(json_file_path)

    for task in data:
        task_id = task["id"]
        annotations = task.get("annotations", [])
        csv_path = task["data"]["ts_file"].split("?d=")[-1]
        csv_path = os.path.join(
            base_path, os.path.basename(csv_path)
        )  # relatif au répertoire courant

        if not os.path.exists(csv_path):
            print(f"CSV file not found: {csv_path}")
            continue

        # Charger le fichier CSV
        df = pd.read_csv(csv_path)
        if "Timestamp" not in df.columns:
            print(f"'Timestamp' column not found in {csv_path}")
            continue

        for annotation in annotations:
            for i, result in enumerate(annotation.get("result", [])):
                label = result["value"]["timeserieslabels"][0]
                start = result["value"]["start"]
                end = result["value"]["end"]

                # Filtrer les données sur la plage [start:end]
                segment = df[
                    (df["Timestamp"] >= start) & (df["Timestamp"] < end)
                ].copy()

                if segment.empty:
                    continue

                # Garder uniquement certaines colonnes
                segment = segment[["Timestamp", "ACCX", "ACCY", "ACCZ"]].reset_index(
                    drop=True
                )
                segment["Timestamp"] = range(
                    len(segment)
                )  # réindexer Timestamp de 0 à n

                # Créer le dossier si nécessaire
                output_dir = os.path.join(base_path, label)
                os.makedirs(output_dir, exist_ok=True)

                # Diviser le segment en fichiers de 100 lignes max
                for j in range(0, len(segment), 100):
                    sub_segment = segment.iloc[j:j + 100].reset_index(drop=True)

                    # Nom du fichier : taskID_segmentIndex_partIndex.csv
                    output_file = f"task{task_id}_segment{i}_part{j // 100}.csv"
                    output_path = os.path.join(output_dir, output_file)

                    # Sauvegarder
                    sub_segment.to_csv(output_path, index=False)
                    print(f"Saved: {output_path}")


if __name__ == "__main__":
    process_json_labels("_annotations.json")


Saved: Normal/task648_segment0_part0.csv
Saved: Normal/task648_segment0_part1.csv
Saved: Normal/task648_segment0_part2.csv
Saved: Normal/task648_segment0_part3.csv
Saved: Normal/task648_segment0_part4.csv
Saved: Normal/task648_segment0_part5.csv
Saved: Normal/task648_segment0_part6.csv
Saved: Normal/task648_segment0_part7.csv
Saved: Normal/task648_segment0_part8.csv
Saved: Normal/task649_segment0_part0.csv
Saved: Normal/task649_segment0_part1.csv
Saved: Normal/task649_segment0_part2.csv
Saved: Normal/task649_segment0_part3.csv
Saved: Normal/task649_segment0_part4.csv
Saved: Normal/task649_segment0_part5.csv
Saved: Normal/task649_segment0_part6.csv
Saved: Normal/task649_segment0_part7.csv
Saved: Normal/task649_segment0_part8.csv
Saved: Normal/task650_segment0_part0.csv
Saved: Normal/task650_segment0_part1.csv
Saved: Normal/task650_segment0_part2.csv
Saved: Normal/task651_segment0_part0.csv
Saved: Normal/task651_segment0_part1.csv
Saved: Normal/task651_segment0_part2.csv
Saved: Normal/ta