In [1]:
"""
Notebook Matrix Profile – ampiimts Python 3.11 package

Goal:
From pre processing signal with original value, normalize value, timestamp. 
We identify discord and motifs with a fix window on matrix profile methode (stumpy.maamp)
"""

# %matplotlib widget

import pandas as pd
from collections import Counter
import os
from typing import List
import ampiimts

def merge_dataframes(dfs: List[pd.DataFrame]) -> pd.DataFrame:
    if not dfs:
        return pd.DataFrame()

    renamed_dfs = []
    col_counter = Counter()

    for df in dfs:
        df = df.copy()

        if 'timestamp' not in df.columns:
            continue

        df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
        timestamp = df['timestamp']
        df = df.drop(columns=['timestamp'])

        new_cols = []
        for col in df.columns:
            col_counter[col] += 1
            suffix = f"_{col_counter[col]}" if col_counter[col] > 1 else ""
            new_cols.append(col + suffix)
        df.columns = new_cols

        df = pd.concat([timestamp, df], axis=1)
        df.columns = ['timestamp'] + new_cols
        renamed_dfs.append(df)

    if not renamed_dfs:
        return pd.DataFrame()

    merged = renamed_dfs[0]
    for df in renamed_dfs[1:]:
        if 'timestamp' not in df.columns or 'timestamp' not in merged.columns:
            continue
        try:
            merged = pd.merge(
                merged,
                df,
                on='timestamp',
                how='outer'
            )
        except Exception:
            continue

    if 'timestamp' in merged.columns and pd.api.types.is_datetime64_any_dtype(merged['timestamp']):
        merged = merged.sort_values(by='timestamp')

    return merged

# --- Chargement sécurisé des fichiers ---
folder = '../../dataset/pollution'
pds = []

with os.scandir(folder) as entries:
    for entry in entries:
        if entry.is_file() and entry.name.endswith('.csv'):
            try:
                df = pd.read_csv(folder + '/' + entry.name, parse_dates=['timestamp'])
                pds.append(df)
            except Exception:
                continue

# --- Merge of files -> one dataframe ---
# pds = merge_dataframes(pds)
pds = pd.read_csv("../../dataset/electricity/elecdata.csv")
# --- preprocessed ---
pds_interpolate, pds_normalized = ampiimts.pre_processed(pds, sort_by_variables=False, cluster=True)

# # --- Matrix_profile ---
# mx_profile = ampiimts.matrix_profile(pds_normalized, cluster=True)

# # ---plot ---
# ampiimts.plot_all_patterns_and_discords(pds_interpolate, mx_profile)
# ampiimts.plot_all_motif_overlays(pds_interpolate, mx_profile)

