
# Sensor Fusion – App CSVs (Accelerometer.csv, Gyroscope.csv, Location.csv, Annotation.csv)

Coloque estes arquivos dentro de `sensor_fusion_lab/data_collection/`:
- `Accelerometer.csv`
- `AccelerometerUncalibrated.csv` (opcional)
- `Gyroscope.csv`
- `GyroscopeUncalibrated.csv` (opcional)
- `Location.csv`
- `Annotation.csv` (opcional, para marcar atividades)
- `Metadata.csv` (opcional)

Este notebook tenta **detectar nomes de colunas automaticamente** e fazer a fusão simples para classificar Stationary / Walking / Driving.


In [1]:

# Imports e utilidades
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, re, math
from datetime import datetime

DATA_DIR = "data_collection"
ACC_FILE = os.path.join(DATA_DIR, "Accelerometer.csv")
GYR_FILE = os.path.join(DATA_DIR, "Gyroscope.csv")
GPS_FILE = os.path.join(DATA_DIR, "Location.csv")
ANN_FILE = os.path.join(DATA_DIR, "Annotation.csv")

print("Paths:")
print(ACC_FILE)
print(GYR_FILE)
print(GPS_FILE)
print(ANN_FILE)


Matplotlib is building the font cache; this may take a moment.


Paths:
data_collection/Accelerometer.csv
data_collection/Gyroscope.csv
data_collection/Location.csv
data_collection/Annotation.csv


In [2]:

# Funções de detecção de colunas
import pandas as pd
import numpy as np
import re

def find_timestamp_col(df):
    candidates = [c for c in df.columns if re.search(r'time|timestamp|epoch', str(c), re.I)]
    return candidates[0] if candidates else None

def find_xyz_cols(df):
    # tenta conjuntos comuns
    common_sets = [('x','y','z'), ('accel_x','accel_y','accel_z'), ('ax','ay','az'),
                   ('gyro_x','gyro_y','gyro_z'), ('gx','gy','gz')]
    for a,b,c in common_sets:
        if a in df.columns and b in df.columns and c in df.columns:
            return a,b,c
    # heurística: pega as 3 primeiras colunas numéricas (ignorando timestamp)
    numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    if len(numeric_cols) >= 3:
        return numeric_cols[0], numeric_cols[1], numeric_cols[2]
    return None, None, None

def find_speed_col(df):
    for pat in [r'speed.*m/?s', r'speed', r'velocit', r'km/?h', r'kph']:
        for c in df.columns:
            if re.search(pat, str(c), re.I):
                return c
    return None

def parse_time_series(df):
    ts_col = find_timestamp_col(df)
    if ts_col is None:
        raise ValueError("Não encontrei coluna de timestamp neste CSV.")
    s = df[ts_col]
    if pd.api.types.is_numeric_dtype(s):
        m = s.median()
        if m > 1e14:
            dt = pd.to_datetime(s, unit="ns", utc=True).dt.tz_localize(None)
        elif m > 1e11:
            dt = pd.to_datetime(s, unit="ms", utc=True).dt.tz_localize(None)
        else:
            dt = pd.to_datetime(s, unit="s", utc=True).dt.tz_localize(None)
    else:
        dt = pd.to_datetime(s, errors="coerce")
    out = df.copy()
    out["timestamp"] = dt
    out = out.dropna(subset=["timestamp"]).sort_values("timestamp")
    return out

def ensure_mps(speed_series):
    s = pd.to_numeric(speed_series, errors="coerce")
    med = np.nanmedian(s)
    if med > 30:  # provável km/h
        s = s / 3.6
    return s

print("Helpers ready.")


Helpers ready.


In [3]:

# Carrega Accelerometer.csv
accel = pd.read_csv(ACC_FILE)
accel = parse_time_series(accel)
ax, ay, az = find_xyz_cols(accel)
if not ax:
    raise RuntimeError("Não encontrei colunas x,y,z no Accelerometer.csv")
accel.rename(columns={ax:'accel_x', ay:'accel_y', az:'accel_z'}, inplace=True)

accel['accel_magnitude'] = np.sqrt(accel['accel_x']**2 + accel['accel_y']**2 + accel['accel_z']**2)
accel['dynamic_accel'] = (accel['accel_magnitude'] - 9.80665).abs()
accel = accel[['timestamp','accel_x','accel_y','accel_z','accel_magnitude','dynamic_accel']]
accel.head(3)


Unnamed: 0,timestamp,accel_x,accel_y,accel_z,accel_magnitude,dynamic_accel
0,2025-10-14 19:48:53.570624500,-0.020419,-0.035778,0.10938,0.116881,9.689769
1,2025-10-14 19:48:53.580587500,0.067299,0.005502,0.189656,0.201317,9.605333
2,2025-10-14 19:48:53.590549500,0.014623,0.033355,0.111038,0.116858,9.689792


In [4]:

# Carrega Gyroscope.csv
gyro = pd.read_csv(GYR_FILE)
gyro = parse_time_series(gyro)
gx, gy, gz = find_xyz_cols(gyro)
if not gx:
    raise RuntimeError("Não encontrei colunas x,y,z no Gyroscope.csv")
gyro.rename(columns={gx:'gyro_x', gy:'gyro_y', gz:'gyro_z'}, inplace=True)
gyro['gyro_magnitude'] = np.sqrt(gyro['gyro_x']**2 + gyro['gyro_y']**2 + gyro['gyro_z']**2)
gyro = gyro[['timestamp','gyro_x','gyro_y','gyro_z','gyro_magnitude']]
gyro.head(3)


Unnamed: 0,timestamp,gyro_x,gyro_y,gyro_z,gyro_magnitude
0,2025-10-14 19:48:53.570624500,-0.063017,-0.137206,-0.006513,0.151126
1,2025-10-14 19:48:53.580587500,-0.1015,-0.110322,0.005074,0.149996
2,2025-10-14 19:48:53.590549500,-0.097608,-0.108192,0.00546,0.145818


In [5]:

# Carrega Location.csv
gps = pd.read_csv(GPS_FILE)
gps = parse_time_series(gps)

sc = find_speed_col(gps)
if sc is None:
    lat_col = next((c for c in gps.columns if re.search(r'lat', str(c), re.I)), None)
    lon_col = next((c for c in gps.columns if re.search(r'lon|lng', str(c), re.I)), None)
    if lat_col and lon_col:
        R = 6371000.0
        lat = np.radians(gps[lat_col].astype(float))
        lon = np.radians(gps[lon_col].astype(float))
        dlat = lat.diff()
        dlon = lon.diff()
        a = np.sin(dlat/2)**2 + np.cos(lat).shift(1)*np.cos(lat)*np.sin(dlon/2)**2
        c = 2*np.arctan2(np.sqrt(a), np.sqrt(1-a))
        dist = R * c
        dt = gps['timestamp'].diff().dt.total_seconds()
        speed = dist / dt
    else:
        speed = pd.Series(np.nan, index=gps.index)
else:
    speed = ensure_mps(gps[sc])

gps = pd.DataFrame({'timestamp': gps['timestamp'], 'speed': speed}).dropna()
gps.head(3)


Unnamed: 0,timestamp,speed
0,2025-10-14 19:48:51.427367000,-1.0
1,2025-10-14 19:48:53.559500000,-1.0
2,2025-10-14 19:48:53.559903200,-1.0


In [6]:

# (Opcional) Annotation.csv
if os.path.exists(ANN_FILE):
    ann = pd.read_csv(ANN_FILE)
    try:
        ann = parse_time_series(ann)
    except Exception:
        ann['timestamp'] = pd.to_datetime(ann.iloc[:,0], errors='coerce')
        ann = ann.dropna(subset=['timestamp'])
    label_col = next((c for c in ann.columns if re.search(r'label|note|annot|activity|state|mode', str(c), re.I)), None)
    if label_col is None:
        label_col = ann.columns[-1]
    annotations = ann[['timestamp', label_col]].rename(columns={label_col:'label'})
else:
    annotations = pd.DataFrame(columns=['timestamp','label'])
annotations.head(5)


EmptyDataError: No columns to parse from file

In [None]:

# Extrai features por janela e classifica
def extract_features_window(a_slice, g_slice, v_slice):
    avg_speed = v_slice['speed'].mean() if len(v_slice) else 0.0
    accel_var = a_slice['dynamic_accel'].var() if len(a_slice) else 0.0
    gyro_var  = g_slice['gyro_magnitude'].var() if len(g_slice) else 0.0
    accel_max = a_slice['dynamic_accel'].max() if len(a_slice) else 0.0
    return {'avg_speed': float(avg_speed or 0.0),
            'accel_variance': float(accel_var or 0.0),
            'gyro_variance': float(gyro_var or 0.0),
            'accel_max': float(accel_max or 0.0)}

def classify(features):
    v = features['avg_speed']; a = features['accel_variance']
    if v < 0.5: return "Stationary"
    elif v < 2.5: return "Walking" if a > 0.3 else "Unknown"
    else: return "Driving" if a < 1.5 else "Unknown"

def run_sliding_analysis(window_seconds=30, step_seconds=10):
    tmin = max(accel['timestamp'].min(), gyro['timestamp'].min())
    tmax = min(accel['timestamp'].max(), gyro['timestamp'].max())
    results = []
    t = tmin
    while t < tmax:
        t2 = t + pd.Timedelta(seconds=window_seconds)
        a_slice = accel[(accel['timestamp'] >= t) & (accel['timestamp'] < t2)]
        g_slice = gyro[(gyro['timestamp'] >= t) & (gyro['timestamp'] < t2)]
        v_slice = gps[(gps['timestamp'] >= t) & (gps['timestamp'] < t2)]
        feats = extract_features_window(a_slice, g_slice, v_slice)
        pred = classify(feats)
        results.append({'start': t, 'end': t2, 'predicted_mode': pred, **feats})
        t += pd.Timedelta(seconds=step_seconds)
    return pd.DataFrame(results)

results_df = run_sliding_analysis(30, 10)
results_df.head()


In [None]:

# Visualizações e export
os.makedirs('results', exist_ok=True)

if len(results_df):
    plt.figure(figsize=(10,4))
    plt.plot(results_df['start'], results_df['avg_speed'])
    plt.title('Average speed per window (m/s)'); plt.xlabel('Time'); plt.ylabel('m/s')
    plt.tight_layout(); plt.savefig('results/speed_windows.png', dpi=200, bbox_inches='tight'); plt.show()

    plt.figure(figsize=(10,4))
    plt.plot(results_df['start'], results_df['accel_variance'])
    plt.title('Accel variance per window'); plt.xlabel('Time'); plt.ylabel('variance')
    plt.tight_layout(); plt.savefig('results/accel_var_windows.png', dpi=200, bbox_inches='tight'); plt.show()

    mode_counts = results_df['predicted_mode'].value_counts()
    mode_counts.to_csv('results/mode_counts.csv')

if len(results_df):
    results_df.to_csv('results/analysis_results_windows.csv', index=False)
    print("Saved results to results/analysis_results_windows.csv")

# se houver labels, junta
if 'annotations' in globals() and len(annotations):
    results_labeled = results_df.copy()
    ann_series = annotations.set_index('timestamp').sort_index()['label']
    results_labeled['label'] = ann_series.reindex(results_labeled['start'], method='ffill').values
    results_labeled.to_csv('results/analysis_with_labels.csv', index=False)
    print("Saved labeled results to results/analysis_with_labels.csv")
