In [59]:
import torch
import os 
import pandas as pd
import sys

In [None]:
DAILY_DIR_PATH = "/home/kuba/Documents/data/raw/kuba_watch_data"

WINDOW_SIZE = 400
STRIDE = WINDOW_SIZE
FLATTEN = True #  If False, each window is a ``` 3 x window_size ``` array with separate arrays for x, y, and z.
EXCLUDE_NO_MOVMENT_WINDOWS = True

MEDICINE_LABEL = 1 #we dont acctualy use in this notbook but will in the true dataloader
DAILY_LABEL = 0 

In [61]:
def get_first_line(path):
    """This first line is the file time samp as such File Start Time: 1720608166722"""
    f = open(path)
    first_line = int(f.readline().strip().split(':')[1])
    return first_line

In [63]:
def window_maker(data):
    #flatten (bool): If True it combines x,y,z data into single list
    res = []
    if FLATTEN:
        # make windows
        for i in range(0, len(data['x'].tolist()) - WINDOW_SIZE + 1, STRIDE):
            combined = []
            combined.extend(data['x'][i:i + WINDOW_SIZE].tolist())
            combined.extend(data['y'][i:i + WINDOW_SIZE].tolist())
            combined.extend(data['z'][i:i + WINDOW_SIZE].tolist())
            res.append(combined)
    else:
        for i in range(0, len(data['x'].tolist()) - WINDOW_SIZE + 1, STRIDE):
            combined = []
            combined.append(data['x'][i:i + WINDOW_SIZE].tolist())
            combined.append(data['y'][i:i + WINDOW_SIZE].tolist())
            combined.append(data['z'][i:i + WINDOW_SIZE].tolist())
            res.append(combined)
    return res

In [64]:
def calculate_magnitude(acc, gyro):
    # For each window, calculate the magnitude across all axes
    if not FLATTEN:
        #we assume shape (num_windows x 3 x WINDOWSIZE)
        acc_magnitudes = torch.sqrt(acc[:,0]**2 + acc[:,1]**2 + acc[:,2]**2)
        gyro_magnitudes = torch.sqrt(gyro[:,0]**2 + gyro[:,1]**2 + gyro[:,2]**2)
    else:
        #we assume shape (num_windows x (WINDOWSIZE * 3))
        acc_magnitudes = torch.sqrt(acc**2)
        gyro_magnitudes = torch.sqrt(gyro**2)
    return acc_magnitudes, gyro_magnitudes

In [65]:
def is_movement_window(acc, gyro, acc_threshold=0.05, gyro_threshold=0.02):
    acc_mag, gyro_mag = calculate_magnitude(acc, gyro)

    # low std dev means little variation
    acc_std = torch.std(acc_mag, dim=1)
    gyro_std = torch.std(gyro_mag,dim=1)
    acc_movements_idx = (acc_std > acc_threshold) 
    gyro_movements_idx = (gyro_std > gyro_threshold)
    valid_indices = torch.logical_or(acc_movements_idx, gyro_movements_idx)    

    return valid_indices

In [66]:
def remove_no_movment_windows(acc, gyro, min_acc_std=0.05, min_gyro_std=0.02):
    valid_indices = is_movement_window(acc, gyro, acc_threshold=min_acc_std, gyro_threshold=min_gyro_std)

    #we mask using the vaild idx only
    filtered_acc = acc[valid_indices]
    filtered_gyro = gyro[valid_indices]
    
    if filtered_acc.shape != filtered_gyro.shape:
        sys.stderr.write("Error in removing the no movment winodws, gyro and acc not longet have same shape")

    return filtered_acc, filtered_gyro

In [67]:
def sample_all_possible_windows_from_file(file_name):
    accl_path = os.path.join(DAILY_DIR_PATH, file_name, "acceleration.csv")
    gyro_path = os.path.join(DAILY_DIR_PATH, file_name, "gyroscope.csv")

    acc = pd.read_csv(accl_path, skiprows=1)
    acc['timestamp']  = (acc['timestamp'] - acc['timestamp'].iloc[0]) * 1e-9 #subtract the start to get first time to be zero then convert from nano to sec

    gyro = pd.read_csv(gyro_path, skiprows=1)
    gyro['timestamp']  = (gyro['timestamp'] - gyro['timestamp'].iloc[0]) * 1e-9 #subtract the start to get first time to be zero then convert from nano to sec

    acc_windows = window_maker(acc) #returns all possible windows from the data giving the defined constatas at the top of the file
    gyro_windows = window_maker(gyro) #returns all possible windows from the data giving the defined constatas at the top of the file
    acc_windows_tensor = torch.tensor(acc_windows) 
    gyro_windows_tensor = torch.tensor(gyro_windows)

    if EXCLUDE_NO_MOVMENT_WINDOWS:
        acc_windows_tensor, gyro_windows_tensor = remove_no_movment_windows(acc_windows_tensor, gyro_windows_tensor)

    """If FLATTEN == False the return shape of both will be (number of windows) x 6 x (WINDOW_SIZE)"""
    if not FLATTEN:
        return torch.concat((acc_windows_tensor, gyro_windows_tensor), dim=1)
    """If FLATTEN == True the return shape of both will be (number of windows) x 2 x (3 * WINDOW_SIZE)"""
    if FLATTEN:
        return torch.stack((acc_windows_tensor, gyro_windows_tensor), dim=1)

In [68]:
available_files = [dir_name for dir_name in sorted(os.listdir(DAILY_DIR_PATH))]

In [69]:
available_files

['2024-07-10_06_42_46',
 '2024-07-12_06_44_51',
 '2024-07-15_06_54_24',
 '2024-07-20_08_05_50',
 '2024-07-25_08_15_51',
 '2024-07-27_09_42_08',
 '2024-08-01_13_00_51',
 '2024-08-02_13_34_07',
 '2024-08-16_11_10_13']

In [70]:
def get_num_windows(num_windows_needed, availabe_files):
    collected = []
    windows_still_needed = num_windows_needed
    if len(available_files) == 0:
        sys.stderr.write("No more files to get daily living windows from")

    while len(collected) < num_windows_needed:
        if len(available_files) == 0:
            sys.stderr.write("No more files to get daily living windows from")
        
        file_name = availabe_files.pop(0) #so that we never sample again from it so prevent data leakadge 
        all_windows = sample_all_possible_windows_from_file(file_name)

        if len(all_windows) >= windows_still_needed:
            collected.append(all_windows[:num_windows_needed])
            return torch.cat(collected, dim=0), availabe_files
        else:
            collected.append(all_windows)
            windows_still_needed -= len(all_windows)

    return torch.cat(collected, dim=0), availabe_files

In [72]:
train_windows, available_files = get_num_windows(17000, available_files)

____

In [None]:
#accl
accl_path = os.path.join("/home/kuba/Documents/data/raw/kuba_watch_data/2024-07-10_06_42_46", "acceleration.csv")
#gyro
gyro_path = os.path.join("/home/kuba/Documents/data/raw/kuba_watch_data/2024-07-10_06_42_46", "gyroscope.csv")

acc = pd.read_csv(accl_path, skiprows=1)
acc['timestamp']  = (acc['timestamp'] - acc['timestamp'].iloc[0]) * 1e-9 #subtract the start to get first time to be zero then convert from nano to sec
#first_row_acc = get_first_line(os.path.join(full_path, 'acceleration.csv'))

gyro = pd.read_csv(gyro_path, skiprows=1)
gyro['timestamp']  = (gyro['timestamp'] - gyro['timestamp'].iloc[0]) * 1e-9 #subtract the start to get first time to be zero then convert from nano to sec
#first_row_gyro = get_first_line(os.path.join(full_path, 'gyroscope.csv'))
# print(labels)

In [None]:
# import plotly.express as px

# px.line(x=gyro['timestamp'][::5], y=gyro['x'][::5])