In [1]:
# theme = 'plotly_dark'
theme = 'seaborn'
#theme = 'plotly'

In [None]:
import sys
import os
import json
import pandas as pd
import torch
import numpy as np
import plotly.graph_objs as go
import json
title = 'Bouts v. Model Pred'

path_to_model_def = '/home/kuba/Projects/MedicationTakingData/resmodel' #this is were the .py file is 
path_to_dir_with_model_pt_file = '/home/kuba/Projects/MedicationTakingData/resmodel/res_search_00/res_search_00_7'

#the watch and recoding we willbe evaling
WATCH_DIR = '/home/kuba/Documents/Data/Raw/Listerine/3_final/16'
recording = '2024-03-24_13_10_54'

HERTZ = 100
ACTIVITY_NAME_TO_CLASS_INDEX_MAPPING = {
    'water':0,
    'listerine':1,
}

In [3]:
def preprocess_window(df, start, window_size):
    """
    Prepare a window of accelerometer and gyroscope data for the model.
    """
    window = df.iloc[start:start + window_size]
    X_acc = torch.tensor([window[col].values for col in ['x_acc', 'y_acc', 'z_acc']], dtype=torch.float32)
    X_gyro = torch.tensor([window[col].values for col in ['x_gyro', 'y_gyro', 'z_gyro']], dtype=torch.float32)
    X_combined = torch.cat([X_acc, X_gyro]).unsqueeze(0)
    # Combine [1, 6, window_size]
    return torch.cat((X_acc, X_gyro), dim=0).unsqueeze(0)

def smooth_predictions(prediction_sum, counts):
    """
    Smooth predictions by averaging, handling divisions by zero.
    """
    mask = counts > 0
    averaged_predictions = np.zeros_like(prediction_sum)
    averaged_predictions[mask] = prediction_sum[mask] / counts[mask]
    return averaged_predictions


In [4]:
def read_txt(dir):
    txt_path = os.path.join(dir, f'desc.txt')
    with open(txt_path, 'r') as f:
        content = f.read()
    dic = eval(content)
    return dic['window_size'], dic['stride'] 

In [5]:
#takes in label a to tensor for ML
def json_to_tensor(labels_x, acc_len_x, acc_x):
    y_new = torch.zeros(acc_len_x)-1

    bouts = []
    for hand in labels_x:
        for action in labels_x[hand]:
            for bout in labels_x[hand][action]:
                y_new[(acc_x.timestamp > bout['start']) & (acc_x.timestamp < bout['end'])] = (ACTIVITY_NAME_TO_CLASS_INDEX_MAPPING[action] * 20 + 15)
    return y_new



In [6]:
def load_and_preprocess_data(recording_dir):
    acc = pd.read_csv(f'{recording_dir}/acceleration.csv', skiprows=1)
    acc['timestamp'] = (acc['timestamp'] - acc['timestamp'].iloc[0]) * 1e-9
    
    gyro = pd.read_csv(f'{recording_dir}/gyroscope.csv', skiprows=1)
    gyro['timestamp'] = (gyro['timestamp'] - gyro['timestamp'].iloc[0]) * 1e-9
    
    # interpolate gyro data to match acc timestamps
    gyro_interp = pd.DataFrame()
    for axis in ['x', 'y', 'z']:
        gyro_interp[axis] = np.interp(acc['timestamp'], gyro['timestamp'], gyro[axis])
    
    # combine acc and gyro data
    sensor_data = pd.DataFrame()
    sensor_data['timestamp'] = acc['timestamp']
    sensor_data['x_acc'] = acc['x']
    sensor_data['y_acc'] = acc['y']
    sensor_data['z_acc'] = acc['z']
    sensor_data['x_gyro'] = gyro_interp['x']
    sensor_data['y_gyro'] = gyro_interp['y']
    sensor_data['z_gyro'] = gyro_interp['z']
    
    return sensor_data

In [7]:
def get_bouts(recording_dir):
    all_data = []
    all_labels = []
    all_starts = []
    all_ends = []
    
    label_mapping = [
        ('left', 'water', 0.0),
        ('left', 'listerine', 1.0),
        ('right', 'water', 0.0),
        ('right', 'listerine', 1.0)
    ]
    
    with open(os.path.join(recording_dir, 'labels.json')) as f:
        labels = json.load(f)
        
    acc = pd.read_csv(os.path.join(recording_dir, 'acceleration.csv'), skiprows=1)
    gyro = pd.read_csv(os.path.join(recording_dir, 'gyroscope.csv'), skiprows=1)
    
    #convert the timestap to sec
    acc['timestamp'] = (acc['timestamp'] - acc['timestamp'].iloc[0]) * 1e-9
    gyro['timestamp'] = (gyro['timestamp'] - gyro['timestamp'].iloc[0]) * 1e-9
    
    merged_data = pd.merge_asof(acc, gyro, on='timestamp', 
                                suffixes=('_acc', '_gyro'))
    
    for side, liquid, label_value in label_mapping:
        if side in labels and liquid in labels[side]:
            for bout in labels[side][liquid]:
                start_time = bout['start'] 
                end_time = bout['end']
                # Extract data for this bout
                bout_data = merged_data[(merged_data['timestamp'] >= start_time) & (merged_data['timestamp'] <= end_time)].copy()
                
                start_index = (merged_data["timestamp"] < start_time).sum()
                end_index = start_index + len(bout_data) 

                if len(bout_data) > 0:
                    all_data.append(bout_data)
                    all_labels.append(label_value)
                    all_starts.append(start_index)
                    all_ends.append(end_index)

    return all_data, all_labels, all_starts, all_ends

In [8]:
def get_preds(model, device, bout, label, window_size, stride):
    assert 'timestamp' in bout.columns, "Sensor data must include 'timestamp' column."
    # y_df = pd.DataFrame(label, columns=['labels'])
    # df = pd.concat([bout, y_df], axis=1)
    
    prediction_sum = np.zeros(len(bout))
    counts = np.zeros(len(bout))
    
    for i in range(0, len(bout) - window_size + 1, stride):
        X_combined = preprocess_window(bout, i, window_size).to(device)
        
        with torch.no_grad():
            logits = torch.sigmoid(model(X_combined)).cpu().numpy()[0]
            prediction_sum[i:i + window_size] += logits
            counts[i:i + window_size] += 1
    
    return prediction_sum, counts
    

In [9]:
sys.path.append(path_to_model_def)
head_tail = os.path.split(path_to_dir_with_model_pt_file)
model_path = os.path.join(path_to_dir_with_model_pt_file, f'{head_tail[1]}_bestF1.pth')


#get model and meta data
window_size, stride = read_txt(path_to_dir_with_model_pt_file)
print(model_path)

# load model
model = torch.load(model_path)
model.eval()
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = model.to(device)


# for each recording do the viz recording
if recording == '*':
    recordings = sorted(os.listdir(WATCH_DIR))
else:
    recordings = [recording]

for rec in recordings:
    if rec == '.DS_Store':
        continue
        
    print(f"Processing recording: {rec}")
    recording_dir = f'{WATCH_DIR}/{rec}'
    #get bouts
    sensor_data = load_and_preprocess_data(recording_dir)
    all_bouts, all_labels, all_starts, all_ends = get_bouts(recording_dir)

    yhat_full_len = np.zeros(len(sensor_data)) #will ve all zeros then will add in the smoothed preds at the right indexes
    y_full_len = np.zeros(len(sensor_data)) #will ve all zeros then will add in the true y at the right indexes

    for bout, label, start, end in zip(all_bouts, all_labels, all_starts, all_ends):
        pred_sum, pred_count = get_preds(model, device, bout, label, window_size, stride) #partition inot widows and combine with the true value
        avg_preds = smooth_predictions(pred_sum, pred_count)

        yhat_full_len[start:end] = avg_preds  * 20 + 10
        y_full_len[start:end] = label  * 20 + 10

    
    fig = go.Figure()
    sensor_cols = ['x_acc', 'y_acc', 'z_acc']
    for col in sensor_cols:
        fig.add_trace(go.Scatter(
            x=sensor_data['timestamp'], y=sensor_data[col],
            name=f'{col}',
            mode='lines', opacity=0.7
        ))
    
    fig.add_trace(go.Scatter(
        x=sensor_data['timestamp'], y=y_full_len,
        name='True Labels', mode='lines',
        line=dict(color='black', width=2)
    ))
    fig.add_trace(go.Scatter(
        x=sensor_data['timestamp'], y=yhat_full_len,
        name='Predictions', mode='lines',
        line=dict(color='red', width=3, dash='dash')
    ))
    
    fig.update_layout(
        title=title, xaxis_title='Time (s)',
        yaxis_title='Value', template='plotly',
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
    )
    fig.show(renderer='browser')







/home/kuba/Projects/MedicationTakingData/resmodel/res_search_00/res_search_00_7/res_search_00_7_bestF1.pth
Using device: cuda:1
Processing recording: 2024-03-24_13_10_54


  X_acc = torch.tensor([window[col].values for col in ['x_acc', 'y_acc', 'z_acc']], dtype=torch.float32)
