In [None]:
"""
Notebook Matrix Profile – ampiimts Python 3.11 package

Goal:
From pre processing signal with original value, normalize value, timestamp. 
We identify discord and motifs with a fix window on matrix profile methode (stumpy.maamp)
"""

# %matplotlib widget

import pandas as pd
from collections import Counter
import os
from typing import List
import ampiimts

def merge_dataframes(dfs: List[pd.DataFrame]) -> pd.DataFrame:
    if not dfs:
        return pd.DataFrame()

    renamed_dfs = []
    col_counter = Counter()

    for df in dfs:
        df = df.copy()

        if 'timestamp' not in df.columns:
            continue

        df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
        timestamp = df['timestamp']
        df = df.drop(columns=['timestamp'])

        new_cols = []
        for col in df.columns:
            col_counter[col] += 1
            suffix = f"_{col_counter[col]}" if col_counter[col] > 1 else ""
            new_cols.append(col + suffix)
        df.columns = new_cols

        df = pd.concat([timestamp, df], axis=1)
        df.columns = ['timestamp'] + new_cols
        renamed_dfs.append(df)

    if not renamed_dfs:
        return pd.DataFrame()

    merged = renamed_dfs[0]
    for df in renamed_dfs[1:]:
        if 'timestamp' not in df.columns or 'timestamp' not in merged.columns:
            continue
        try:
            merged = pd.merge(
                merged,
                df,
                on='timestamp',
                how='outer'
            )
        except Exception:
            continue

    if 'timestamp' in merged.columns and pd.api.types.is_datetime64_any_dtype(merged['timestamp']):
        merged = merged.sort_values(by='timestamp')

    return merged

# --- Chargement sécurisé des fichiers ---
folder = '../../dataset/human'
pds = []

with os.scandir(folder) as entries:
    for entry in entries:
        if entry.is_file() and entry.name.endswith('.csv'):
            try:
                df = pd.read_csv(folder + '/' + entry.name, parse_dates=['timestamp'])
                pds.append(df)
            except Exception:
                continue


def load_and_add_datetime_column(filepath):
    # Lire le fichier CSV
    df = pd.read_csv(filepath)

    # Créer une colonne datetime
    df['datetime'] = pd.to_datetime(df[['year', 'month', 'day', 'hour']])

    # (Optionnel) Réorganiser les colonnes pour placer datetime au début
    cols = ['timestamp'] + [col for col in df.columns if col != 'timestamp']
    df = df[cols]

    return df

pds_trimmed = []
rows_remaining = 30_000

for df in pds:
    df = load_and_add_datetime_column(df)
    if rows_remaining <= 0:
        break
    if len(df) <= rows_remaining:
        pds_trimmed.append(df)
        rows_remaining -= len(df)
    else:
        pds_trimmed.append(df.iloc[:rows_remaining])

# --- Merge of files -> one dataframe ---
# pds = merge_dataframes(pds)
# --- preprocessed ---
pds_interpolate, pds_normalized = ampiimts.pre_processed(pds_trimmed, sort_by_variables=False, cluster=True)



# # --- Matrix_profile ---
mx_profile = ampiimts.matrix_profile(pds_normalized, cluster=True)

# ---plot ---
ampiimts.plot_all_patterns_and_discords(pds_interpolate, mx_profile)
ampiimts.plot_all_motif_overlays(pds_interpolate, mx_profile)

[INFO] 1 DataFrame(s) ignoré(s) pour fréquence trop éloignée.
[INFO] No aligned ranges → Using fresh index from 1970 with length 30044 and frequency 0 days 00:00:00.020000

[Corrélation croisée entre clusters et autres colonnes :]
  ↪ Cluster 01 (17 variables) ↔ autres : corr moyenne = 0.182
  ↪ Cluster 02 (2 variables) ↔ autres : corr moyenne = 0.145
  ↪ Cluster 03 (2 variables) ↔ autres : corr moyenne = 0.085
  ↪ Cluster 04 (4 variables) ↔ autres : corr moyenne = 0.248

[Corrélation entre familles de capteurs :]
            Unnamed: 0  back_x  back_y  back_z  thigh_x  thigh_y  thigh_z
Unnamed: 0        1.00    0.29    0.32    0.36     0.45     0.30     0.54
back_x            0.29    0.21    0.11    0.16     0.16     0.11     0.16
back_y            0.32    0.11    0.22    0.15     0.17     0.14     0.19
back_z            0.36    0.16    0.15    0.29     0.21     0.15     0.23
thigh_x           0.45    0.16    0.17    0.21     0.34     0.21     0.31
thigh_y           0.30    0.11    0.