In [1]:
"""
import pandas as pd
from pathlib import Path
import openpyxl
import numpy as np

def spread_duplicate_timestamps(df, timestamp_col="Timestamp GPS", interval_ms=100):
    new_rows = []

    for ts, group in df.groupby(timestamp_col):
        n = len(group)
        if n == 1:
            new_rows.append(group)
        else:
            # Créer des timestamps répartis dans l’intervalle (ex : 100 ms)
            deltas = pd.to_timedelta(
                np.linspace(0, interval_ms, n, endpoint=False), unit="ms"
            )
            new_ts = ts + deltas
            group = group.copy()
            group[timestamp_col] = new_ts
            new_rows.append(group)

    return pd.concat(new_rows).sort_values(timestamp_col).reset_index(drop=True)


# 1. Charger all_data.csv
all_data = pd.read_csv("all_data.csv")
all_data['ISODateTimeUTC'] = pd.to_datetime(all_data['ISODateTimeUTC'], errors='coerce')

# 2. Séparer les lignes à enrichir (SenseBoard) et les autres
senseboard_rows = all_data[all_data["boat_name"] == "SenseBoard"].copy()
print(senseboard_rows["ISODateTimeUTC"][0:10])
other_rows = all_data[all_data["boat_name"] != "SenseBoard"].copy()

# 3. Préparation
root_dir = Path("../Data_Sailnjord/Straight_lines/")
merged_parts = []

# 4. Traitement des sous-dossiers
for subfolder in root_dir.iterdir():
    if not subfolder.is_dir():
        continue

    print(f"Traitement du dossier : {subfolder.name}")
    senseboard_file = next(subfolder.glob("SenseBoard_log_modified*.xlsx"), None)
    
    if senseboard_file is None:
        print(f"Aucun fichier SenseBoard trouvé dans {subfolder.name}")
        continue

    # Charger le fichier SenseBoard
    sb_data = pd.read_excel(senseboard_file, dtype=str, engine='openpyxl')
    sb_data.columns = sb_data.columns.str.strip()

    if "Timestamp GPS" not in sb_data.columns:
        raise ValueError("Colonne 'Timestamp GPS' non trouvée")

    # Utiliser le bon format européen avec virgule
    sb_data["Timestamp GPS"] = pd.to_datetime(
        sb_data["Timestamp GPS"],
        format="%Y-%m-%d %H:%M:%S.%f",
        errors='coerce'
    )
    sb_data = sb_data.dropna(subset=["Timestamp GPS"])
    sb_data["Timestamp GPS"] = sb_data["Timestamp GPS"].dt.tz_localize("UTC")
    # Convertir les colonnes numériques
    for col in sb_data.columns:
        if col != "Timestamp GPS":
            sb_data[col] = pd.to_numeric(sb_data[col], errors='coerce')

    # Moyenne par timestamp
    # sb_data = sb_data.groupby("Timestamp GPS", as_index=False).mean(numeric_only=True)
    sb_data = spread_duplicate_timestamps(sb_data, interval_ms=100)
    print(sb_data["Timestamp GPS"][0:10])
    # Garder uniquement les colonnes utiles
    cols_to_keep = [
        "Timestamp GPS",
        "LoadCell_1", "LoadCell_2", "LoadCell_3", "LoadCell_4", "LoadCell_5", "LoadCell_6",
        "F_front", "F_back",
        "M_tot_X", "M_tot_Y",
        "M_front_X", "M_front_Y",
        "M_back_X", "M_back_Y",
        "P_front_X", "P_front_Y",
        "P_back_X", "P_back_Y"
    ]

    sb_data = sb_data[[col for col in cols_to_keep if col in sb_data.columns]]
    # Fusion avec les lignes SenseBoard uniquement
    merged = pd.merge_asof(
        senseboard_rows.sort_values("ISODateTimeUTC"),
        sb_data.sort_values("Timestamp GPS"),
        left_on="ISODateTimeUTC",
        right_on="Timestamp GPS",
        direction='nearest',
        tolerance=pd.Timedelta("100ms")
    )

    merged_parts.append(merged)


# 5. Fusion finale
if merged_parts:
    full_merged = pd.concat(merged_parts, ignore_index=True)
else:
    full_merged = senseboard_rows.copy()

# 6. Concaténer avec les autres lignes
final = pd.concat([full_merged, other_rows], ignore_index=True).sort_values("ISODateTimeUTC")

# 7. Sauvegarde unique
final.to_csv("all_data_enriched.csv", index=False)
print("Fusion complète sauvegardée dans all_data_enriched.csv")
"""

'\nimport pandas as pd\nfrom pathlib import Path\nimport openpyxl\nimport numpy as np\n\ndef spread_duplicate_timestamps(df, timestamp_col="Timestamp GPS", interval_ms=100):\n    new_rows = []\n\n    for ts, group in df.groupby(timestamp_col):\n        n = len(group)\n        if n == 1:\n            new_rows.append(group)\n        else:\n            # Créer des timestamps répartis dans l’intervalle (ex : 100 ms)\n            deltas = pd.to_timedelta(\n                np.linspace(0, interval_ms, n, endpoint=False), unit="ms"\n            )\n            new_ts = ts + deltas\n            group = group.copy()\n            group[timestamp_col] = new_ts\n            new_rows.append(group)\n\n    return pd.concat(new_rows).sort_values(timestamp_col).reset_index(drop=True)\n\n\n# 1. Charger all_data.csv\nall_data = pd.read_csv("all_data.csv")\nall_data[\'ISODateTimeUTC\'] = pd.to_datetime(all_data[\'ISODateTimeUTC\'], errors=\'coerce\')\n\n# 2. Séparer les lignes à enrichir (SenseBoard) et les

In [2]:
import pandas as pd
from pathlib import Path
import openpyxl
import numpy as np

def spread_duplicate_timestamps(df, timestamp_col="Timestamp GPS", interval_ms=100):
    new_rows = []

    for ts, group in df.groupby(timestamp_col):
        n = len(group)
        if n == 1:
            new_rows.append(group)
        else:
            # Répartir uniformément les timestamps dans l'intervalle
            deltas = pd.to_timedelta(
                np.linspace(0, interval_ms, n, endpoint=False), unit="ms"
            )
            new_ts = ts + deltas
            group = group.copy()
            group[timestamp_col] = new_ts
            new_rows.append(group)

    return pd.concat(new_rows).sort_values(timestamp_col).reset_index(drop=True)


# 1. Charger all_data.csv
all_data = pd.read_csv("all_data.csv")
all_data['ISODateTimeUTC'] = pd.to_datetime(all_data['ISODateTimeUTC'], errors='coerce')

# 2. Séparer les lignes à enrichir (SenseBoard) et les autres
senseboard_rows = all_data[all_data["boat_name"] == "SenseBoard"].copy()
other_rows = all_data[all_data["boat_name"] != "SenseBoard"].copy()

# 3. Préparation
root_dir = Path("../Data_Sailnjord/Straight_lines/")
merged_parts = []

# 4. Colonnes utiles à conserver
cols_to_keep = [
    "Timestamp GPS",
    "LoadCell_1", "LoadCell_2", "LoadCell_3", "LoadCell_4", "LoadCell_5", "LoadCell_6",
    "F_front", "F_back",
    "M_tot_X", "M_tot_Y",
    "M_front_X", "M_front_Y",
    "M_back_X", "M_back_Y",
    "P_front_X", "P_front_Y",
    "P_back_X", "P_back_Y"
]

# 5. Traitement des sous-dossiers
for subfolder in root_dir.iterdir():
    if not subfolder.is_dir():
        continue

    print(f"Traitement du dossier : {subfolder.name}")
    senseboard_file = next(subfolder.glob("SenseBoard_log_modified*.xlsx"), None)
    
    if senseboard_file is None:
        print(f"Aucun fichier SenseBoard trouvé dans {subfolder.name}")
        continue

    # Charger le fichier SenseBoard
    sb_data = pd.read_excel(senseboard_file, dtype=str, engine='openpyxl')
    sb_data.columns = sb_data.columns.str.strip()

    if "Timestamp GPS" not in sb_data.columns:
        raise ValueError("Colonne 'Timestamp GPS' non trouvée")

    # Parser les timestamps
    sb_data["Timestamp GPS"] = pd.to_datetime(
        sb_data["Timestamp GPS"],
        format="%Y-%m-%d %H:%M:%S.%f",
        errors='coerce'
    )
    sb_data = sb_data.dropna(subset=["Timestamp GPS"])
    sb_data["Timestamp GPS"] = sb_data["Timestamp GPS"].dt.tz_localize("UTC")

    # Conversion des autres colonnes en numériques
    for col in sb_data.columns:
        if col != "Timestamp GPS":
            sb_data[col] = pd.to_numeric(sb_data[col], errors='coerce')

    # Répartir les doublons temporels
    sb_data = spread_duplicate_timestamps(sb_data, interval_ms=100)

    # Garder seulement les colonnes nécessaires
    sb_data = sb_data[[col for col in cols_to_keep if col in sb_data.columns]]

    # Extraire la période de ce fichier
    start_time = sb_data["Timestamp GPS"].min()
    end_time = sb_data["Timestamp GPS"].max()

    # Filtrer les lignes SenseBoard correspondant à cette plage
    sb_rows_sub = senseboard_rows[
        (senseboard_rows["ISODateTimeUTC"] >= start_time) &
        (senseboard_rows["ISODateTimeUTC"] <= end_time)
    ].copy()

    if sb_rows_sub.empty:
        print(f"Aucune ligne SenseBoard à enrichir pour {subfolder.name}")
        continue

    # Fusion temporelle précise
    merged = pd.merge_asof(
        sb_rows_sub.sort_values("ISODateTimeUTC"),
        sb_data.sort_values("Timestamp GPS"),
        left_on="ISODateTimeUTC",
        right_on="Timestamp GPS",
        direction='nearest',
        tolerance=pd.Timedelta("50ms")
    )

    merged_parts.append(merged)

# 6. Fusion des blocs enrichis
if merged_parts:
    full_merged = pd.concat(merged_parts, ignore_index=True)
else:
    full_merged = senseboard_rows.copy()

# 7. Ajout des autres lignes
final = pd.concat([full_merged, other_rows], ignore_index=True).sort_values("ISODateTimeUTC")

# 8. Sauvegarde finale
final.to_csv("all_data_enriched.csv", index=False)
print("Fusion complète sauvegardée dans all_data_enriched.csv")


Traitement du dossier : 06_06
Traitement du dossier : 07_06
Traitement du dossier : 09_06
Traitement du dossier : 10_06
Fusion complète sauvegardée dans all_data_enriched.csv
