In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.signal import butter, filtfilt
from os.path import isfile, join
import zipfile
import os
import json
from pathlib import Path


In [None]:
import analyze

## Load the data

In [None]:
path = Path('../data_cleaned/Timo')
onlyfiles = [f for f in os.listdir(path) if '.Identifier' not in f]

# Use all zips
zip_files = {}
for filename in onlyfiles:
    name = filename.removesuffix('.zip')
    parts = name.split()
    if len(parts) >= 3:
        key = f"{parts[0]}_{parts[1]}"
        zip_files[key] = filename

print(zip_files)

In [None]:
# Or select specific ones:
# zip_files = {
#     'base_2025-06-06': 'base 2025-06-06 15-45-11.zip',
#     'base_2025-06-07': 'base 2025-06-07 12-17-44.zip', 
#     '200ml_2025-06-07': '200ml 2025-06-07 12-52-34.zip',
#     '230ml_2025-06-06': '230ml 2025-06-06 16-22-45.zip'
# }
# data_path = "/Users/shavindra/Documents/Personal/MLQS/MLQS/data/Timo"
analyzer = analyze.CaffeineAnalyzer(data_path=path, zip_files=zip_files)
analyzer.extract_and_load_data()
analyzer.explore_data_structure()

In [None]:
analyzer.all_data

In [None]:
def print_keys_recursively(data, indent: int = 0):
    """
    Recursively prints all dict keys and list indices in `data`.
    """
    spacer = "    " * indent
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"{spacer}{key}")
            print_keys_recursively(value, indent + 1)
    elif isinstance(data, list):
        for idx, item in enumerate(data):
            print(f"{spacer}[{idx}]")
            print_keys_recursively(item, indent + 1)
    # you can add more type checks here if needed (e.g. for custom containers)

# Then just call it on your analyzer’s data:
print_keys_recursively(analyzer.all_data)

### Accelerometer data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from datetime import datetime

def extract_date(key):
    """Pull the YYYY-MM-DD from the dataset name and convert to datetime."""
    m = re.search(r'(\d{4}-\d{2}-\d{2})', key)
    return datetime.strptime(m.group(1), '%Y-%m-%d') if m else datetime.min

def plot_combined_accelerations(all_data):
    # 1. Build combined DataFrame
    records = []
    for name, data_dict in all_data.items():
        df = data_dict.get('Accelerometer.csv')
        if df is None:
            continue
        tmp = df[['time', 
                  'x', 
                  'y', 
                  'z']].copy()
        tmp['dataset'] = name
        records.append(tmp)
    if not records:
        print("No accelerometer data found.")
        return
    combined = pd.concat(records, ignore_index=True)

    # 2. Build a hue_order sorted purely by the embedded date
    available = combined['dataset'].unique()
    hue_order = sorted(available, key=extract_date)

    # 3. Plot settings
    sns.set_theme(style="whitegrid")

    # 4. Accel X
    plt.figure(figsize=(10, 5))
    sns.lineplot(
        data=combined,
        x='time', y='x',
        hue='dataset', hue_order=hue_order,
        linewidth=1
    )
    plt.xlabel('Time (s)')
    plt.ylabel('Acceleration X (m/s²)')
    plt.title('Combined Acceleration X (chronological order)')
    plt.legend(title='Dataset', loc='best')
    plt.tight_layout()
    plt.show()

    # 5. Accel Y
    plt.figure(figsize=(10, 5))
    sns.lineplot(
        data=combined,
        x='time', y='y',
        hue='dataset', hue_order=hue_order,
        linewidth=1
    )
    plt.xlabel('Time (s)')
    plt.ylabel('Acceleration Y (m/s²)')
    plt.title('Combined Acceleration Y (chronological order)')
    plt.legend(title='Dataset', loc='best')
    plt.tight_layout()
    plt.show()

    # 6. Accel Z
    plt.figure(figsize=(10, 5))
    sns.lineplot(
        data=combined,
        x='time', y='z',
        hue='dataset', hue_order=hue_order,
        linewidth=1
    )
    plt.xlabel('Time (s)')
    plt.ylabel('Acceleration Z (m/s²)')
    plt.title('Combined Acceleration Z (chronological order)')
    plt.legend(title='Dataset', loc='best')
    plt.tight_layout()
    plt.show()

# Example usage:
# all_data = {
#     '2021-01-01': {'Accelerometer.csv': pd.read_csv('path_to_accel_2021-01-01.csv')},
#     '2021-02-01': {'Accelerometer.csv': pd.read_csv('path_to_accel_2021-02-01.csv')},
#     ...
# }
# plot_combined_accelerations(all_data)

# Display the updated function to the user
print(plot_combined_accelerations.__doc__)


In [None]:
plot_combined_accelerations(analyzer.all_data)


In [None]:
from AccelInteractivePlot import AccelInteractivePlot
plotter = AccelInteractivePlot(analyzer.all_data)
plotter.display()

### Gyrscope data



In [None]:
import re
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def extract_date(key):
    """Pull the YYYY-MM-DD from the dataset name and convert to datetime."""
    m = re.search(r'(\d{4}-\d{2}-\d{2})', key)
    return datetime.strptime(m.group(1), '%Y-%m-%d') if m else datetime.min

def plot_combined_gyroscope(all_data):
    """
    Plot three figures (Gyro X, Y, Z) for all 'Gyroscope.csv' datasets in all_data,
    in chronological legend order.
    """
    # 1. Build combined DataFrame
    records = []
    for name, data_dict in all_data.items():
        df = data_dict.get('Gyroscope.csv')
        if df is None:
            continue
        tmp = df[['Time_s',
                  'Gyroscope_x_rad/s',
                  'Gyroscope_y_rad/s',
                  'Gyroscope_z_rad/s']].copy()
        tmp['dataset'] = name
        records.append(tmp)
    if not records:
        print("No gyroscope data found.")
        return
    combined = pd.concat(records, ignore_index=True)

    # 2. Determine legend order by embedded date
    available = combined['dataset'].unique()
    hue_order = sorted(available, key=extract_date)

    # 3. Seaborn theme
    sns.set_theme(style="whitegrid")

    # 4. Plot each axis
    for axis_label in ['x', 'y', 'z']:
        col = f'Gyroscope_{axis_label}_rad/s'
        plt.figure(figsize=(10, 5))
        sns.lineplot(
            data=combined,
            x='Time_s', y=col,
            hue='dataset',
            hue_order=hue_order,
            linewidth=1
        )
        plt.xlabel('Time (s)')
        plt.ylabel(f'Gyroscope {axis_label.upper()} (rad/s)')
        plt.title(f'Combined Gyroscope {axis_label.upper()} (chronological order)')
        plt.legend(title='Dataset', loc='best')
        plt.tight_layout()
        plt.show()


In [None]:
plot_combined_gyroscope(analyzer.all_data)

In [None]:
from GyroInteractivePlot import GyroInteractivePlot
gyro_plotter = GyroInteractivePlot(analyzer.all_data)
gyro_plotter.display()

## Linear acceleration



In [None]:


import re
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def extract_date(key):
    """Pull the YYYY-MM-DD from the dataset name and convert to datetime."""
    m = re.search(r'(\d{4}-\d{2}-\d{2})', key)
    return datetime.strptime(m.group(1), '%Y-%m-%d') if m else datetime.min

def plot_combined_linear_accelerations(all_data):
    # 1. Gather all records
    records = []
    for name, data_dict in all_data.items():
        df = data_dict.get('Linear Acceleration.csv')
        if df is None:
            continue

        # select only the linear acceleration columns
        tmp = df[['Time_s',
                  'Linear_Acceleration_x_m/s^2',
                  'Linear_Acceleration_y_m/s^2',
                  'Linear_Acceleration_z_m/s^2']].copy()

        # melt so we can plot x, y, z on the same axes
        tmp = tmp.melt(
            id_vars='time',
            value_vars=[
                'Linear_Acceleration_x_m/s^2',
                'Linear_y',
                'Linear_Acceleration_z_m/s^2'
            ],
            var_name='axis',
            value_name='linear_acceleration'
        )
        tmp['dataset'] = name
        records.append(tmp)

    if not records:
        print("No linear acceleration data found.")
        return

    combined = pd.concat(records, ignore_index=True)

    # 2. Order datasets by date embedded in the name
    hue_order = sorted(combined['dataset'].unique(), key=extract_date)

    # 3. Plot
    sns.set_theme(style="whitegrid")
    plt.figure(figsize=(12, 6))
    sns.lineplot(
        data=combined,
        x='time',
        y='linear_acceleration',
        hue='dataset',
        style='axis',
        hue_order=hue_order,
        linewidth=1
    )
    plt.xlabel('Time (s)')
    plt.ylabel('Linear acceleration (m/s²)')
    plt.title('Combined linear acceleration (X, Y, Z) in chronological order')
    plt.legend(title='Dataset / axis', loc='best')
    plt.tight_layout()
    plt.show()

In [None]:
plot_combined_accelerations(analyzer.all_data)

In [None]:
from LinearAccInteractivePlot import LinearAccInteractivePlot
linearAcc_plotter = LinearAccInteractivePlot(analyzer.all_data)
linearAcc_plotter.display()