In [9]:
from io import StringIO

from tensorflow import keras
import matplotlib.pyplot as plt
from scipy.io import arff
import seaborn as sns
from loguru import logger
import yaml

from datetime import datetime
import polars as pl
import pandas as pd
import numpy as np
import os
import joblib

import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))

# Metrics
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score,
    precision_recall_fscore_support,
    balanced_accuracy_score,
    roc_auc_score,
    roc_curve
)

# PERSONAL FUNCTIONS
from plots import *
from utils import *
from functions.windows import create_feature_windows # creación de ventanas e ingenieria de características

In [10]:
def compute_sampling_rate_stats(timestamps: pd.Series) -> dict:
    """
    Calcula métricas de la frecuencia de muestreo a partir de timestamps.
    
    Parameters
    ----------
    timestamps : pd.Series
        Serie de timestamps (dtype datetime64).
    
    Returns
    -------
    dict con:
        - mean_hz: frecuencia promedio
        - median_hz: frecuencia mediana (más robusta a outliers)
        - std_hz: variación de la frecuencia
        - min_hz, max_hz: valores extremos
    """
    # diferencias entre timestamps en segundos
    diffs = np.diff(timestamps.view(np.int64) / 1e9)
    
    # proteger en caso de valores repetidos
    diffs = diffs[diffs > 0]
    if len(diffs) == 0:
        return {"mean_hz": 0, "median_hz": 0, "std_hz": 0, "min_hz": 0, "max_hz": 0}
    
    hz = 1.0 / diffs
    return {
        "mean_hz": float(np.mean(hz)),
        "median_hz": float(np.median(hz)),
        "std_hz": float(np.std(hz)),
        "min_hz": float(np.min(hz)),
        "max_hz": float(np.max(hz)),
    }


In [11]:
import json
with open(r'F:\UPC\Tesis\HARbit-Model\src\data\real-data\sit-left-01.json', 'rb') as file:
    data = json.load(file)

gyro_df = data['gyro']
accel_df = data['accel']

In [12]:
target = "Sit"

In [13]:
accel_temp = pl.DataFrame(accel_df)
gyro_temp = pl.DataFrame(gyro_df)

In [14]:
accel_temp = accel_temp.with_columns(pl.lit('A').alias('Usuario'))
gyro_temp  = gyro_temp.with_columns(pl.lit('A').alias('Usuario'))

In [15]:
accel_temp   = accel_temp.with_columns(pl.lit(target).alias('gt'))
gyro_temp    = gyro_temp.with_columns(pl.lit(target).alias('gt'))

In [16]:
df_accel = normalize_columns(accel_temp,
                            user_col_name  = "Usuario", 
                            timestamp_col_name = "timestamp", 
                            label_col_name = "gt", 
                            x_col_name = "x", 
                            y_col_name = "y", 
                            z_col_name = "z")

df_gyro = normalize_columns(gyro_temp, 
                            user_col_name  = "Usuario", 
                            timestamp_col_name = "timestamp", 
                            label_col_name = "gt", 
                            x_col_name = "x", 
                            y_col_name = "y", 
                            z_col_name = "z")

In [17]:
df_accel = convert_timestamp(df_accel)
df_gyro = convert_timestamp(df_gyro)

In [19]:
stats = compute_sampling_rate_stats(df_accel.to_pandas()["Timestamp"])
stats

  diffs = np.diff(timestamps.view(np.int64) / 1e9)


{'mean_hz': 26.901419406346573,
 'median_hz': 25.12874966176535,
 'std_hz': 6.446095185238278,
 'min_hz': 11.149806390298968,
 'max_hz': 50.45579748923072}

In [20]:
df_uniform = (
    df_accel.to_pandas()
    .set_index("Timestamp")
    .resample("50ms")       # fuerza 20 Hz exactos
    .interpolate()
    .reset_index()
)


  .interpolate()
