https://www.science.org/doi/10.1126/science.1127647

https://github.com/L1aoXingyu/pytorch-beginner/blob/9c86be785c7c318a09cf29112dd1f1a58613239b/08-AutoEncoder/conv_autoencoder.py

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch import nn
from pathlib import Path
from torch.utils.data import TensorDataset, DataLoader, Dataset
from lib.modules import (
    evaluate_loop, 
    pad_for_windowing,
    window_session,
    optimization_loop,
    optimization_loop_xonly
)
from lib.models import LinearAutoencoder, ConvAutoencoder, ConvAutoencoderImproved
from tqdm import tqdm
import plotly.express as px
from datetime import timedelta
from sklearn.model_selection import train_test_split
from lib.datasets import AccRawDataset

# Data Processesing
1. Get list of N raw recording directories (from delta app, no labels)
2. Read all raw data into N DataFrames of lengths l. Reset their timestamp to be seconds from the start. Print the length of each session
3. Read and pad N recordings into N tensors of sizes (l+100 x 3). Then concatonate all recordings into a single tensor of size (L x 3)
4. Cut off end of tensor so that C|L. Split the single tensor into chunks of C=1.8e6 samples (5 hours) to get a tensor of size (L/C x C x 3).
5. Pad each chunk to get a tensor of size (L/C x C+100 x 3). This is so that no windows overlap two chunks
6. Take 75% of chunks to be training chunks, and take the other 25% to be test chunks
7. Seperately flatten the first two dims of train and test chunks to get tensors of size (L1 x 3) and (L2 x 3)
8. Create an AccRawDataset for train and test chunks. This dataset will create windows dynamically to return tensors of size (303). It will have size L-101.

In [3]:
WINSIZE = 101
DEVICE = 'cuda:1'
RAW_DIR = Path('/home/musa/datasets/eating_raw/')

In [None]:
recordings = []
for rec_dir in RAW_DIR.iterdir():
    recordings.append(rec_dir)
    print(rec_dir.name)

In [None]:
accelerations = []
for session_dir in recordings:
    accel_file = session_dir / f'acceleration-{session_dir.name}.csv'
    if not accel_file.is_file():
        accel_file = session_dir / f'acceleration.csv'

    acceleration = pd.read_csv(accel_file,skiprows=1).rename({'x': 'x_acc', 'y': 'y_acc', 'z': 'z_acc'}, axis=1)
    acceleration = acceleration.dropna()

    acceleration_start_time_seconds = float(pd.read_csv(session_dir / accel_file, nrows=1,header=None).iloc[0,0].split()[-1])/1000
    acceleration.timestamp = ((acceleration.timestamp - acceleration.timestamp[0])*1e-9)+acceleration_start_time_seconds # get timestamp in seconds

    accelerations.append(acceleration)

    print(f'Index: {len(accelerations)-1}, Date: {session_dir.name}, nSamples: {len(acceleration)}, Time Elapsed: {timedelta(seconds=acceleration.timestamp.iloc[-1] - acceleration.timestamp.iloc[0])}')


In [None]:
accs = []
for acc in accelerations:
    accs.append(pad_for_windowing(torch.Tensor(acc[['x_acc','y_acc','z_acc']].values), WINSIZE))

chunk_len = 5 * 60 * 60 * 100 # = 1,800,000 samples ie. 5 hours of recording
all_acc = torch.cat(accs, axis=0)
all_acc = all_acc[:len(all_acc) - len(all_acc) % chunk_len] # cut off very last part
all_acc = all_acc.view(-1, chunk_len, 3)

np.random.seed(10)
def proc(x):
    x = pad_for_windowing(x, WINSIZE) # pad second dimension
    x = x.flatten(end_dim=1)
    return x
acctr, accte = map(proc, train_test_split(all_acc, test_size=0.25))

Xtr = AccRawDataset(acctr, WINSIZE)
Xte = AccRawDataset(accte, WINSIZE)

In [None]:
print(len(all_acc.flatten(end_dim=1)) / 100 / 60 / 60)
print(len(acctr) / 100 / 60 / 60)
print(len(accte) / 100 / 60 / 60)

In [None]:
trainloader = DataLoader(Xtr, batch_size=64, shuffle=True)
testloader = DataLoader(Xte, batch_size=64)

torch.save(trainloader, 'pytorch_datasets/trainloader_11-16-23.pt')
torch.save(testloader, 'pytorch_datasets/testloader_11-16-23.pt')

In [None]:
dim_amt = 20
i = 5
fig = px.line(accelerations[i][::dim_amt], x=accelerations[i].index[::dim_amt], y=['x_acc','y_acc','z_acc'])
fig.show(renderer='browser')

In [None]:
trainloader = torch.load('pytorch_datasets/trainloader_11-16-23.pt')
testloader = torch.load('pytorch_datasets/testloader_11-16-23.pt')

In [None]:
model = ConvAutoencoderImproved(winsize=WINSIZE).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.MSELoss()

In [None]:
optimization_loop_xonly(model, trainloader, testloader, criterion, optimizer, 5, DEVICE, Path('dev/test'))

In [None]:
torch.save(model.state_dict(), 'dev/autoencorder.pt')

# Evaluate

In [4]:
trainloader = torch.load('pytorch_datasets/trainloader_11-16-23.pt')
testloader = torch.load('pytorch_datasets/testloader_11-16-23.pt')

In [5]:
model = ConvAutoencoderImproved(winsize=WINSIZE).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.MSELoss()

In [6]:
model.load_state_dict(torch.load('dev/autoencoder4_conv-impr/best_model.pt'))

<All keys matched successfully>

In [12]:
len(trainloader)

590657

In [22]:
# Recreate a signal with trained model

Xtrue = []
Xpred = []

for i,X in tqdm(enumerate(testloader)):
    if i > 2000:
        break

    X = X.to(DEVICE)
    logits = model(X)
    Xtrue.append(X.detach().cpu().view(-1,3,101)[:,:,50])
    Xpred.append(logits.detach().cpu().view(-1,3,101)[:,:,50])

Xtrue = torch.cat(Xtrue).T
Xpred = torch.cat(Xpred).T

acceleration = pd.DataFrame()
acceleration['x_acc'] = Xtrue[0]
acceleration['y_acc'] = Xtrue[1]
acceleration['z_acc'] = Xtrue[2]
acceleration['x_pred'] = Xpred[0]
acceleration['y_pred'] = Xpred[1]
acceleration['z_pred'] = Xpred[2]

0it [00:00, ?it/s]

2001it [00:02, 938.95it/s]


In [19]:
dim_amt = 1
fig = px.line(acceleration[::dim_amt], x=acceleration.index[::dim_amt], y=['x_acc','y_acc','z_acc', 'x_pred', 'y_pred', 'z_pred'])
fig.show(renderer='browser')

In [None]:
from lib.modules import read_and_window_nursing_session, read_nursing_session

i = 58
X,y = read_and_window_nursing_session(i, WINSIZE, Path('/home/musa/datasets/nursingv1'), Path('/home/musa/datasets/eating_labels/'))
testloader = DataLoader(TensorDataset(X), batch_size=64)
acceleration = read_nursing_session(i, Path('/home/musa/datasets/nursingv1'))

In [None]:
# Recreate a signal with trained model

Xpred = []
for X in tqdm(testloader):
    X = X[0].to(DEVICE)
    logits = model(X)
    Xpred.append(logits)

Xpred = torch.cat(Xpred)
Xpred = Xpred.view(-1,3,101)[:,:,50].T # unwindow

acceleration['x_pred'] = Xpred[0].cpu().detach()
acceleration['y_pred'] = Xpred[1].cpu().detach()
acceleration['z_pred'] = Xpred[2].cpu().detach()

In [None]:
dim_amt = 5
fig = px.line(acceleration[::dim_amt], x=acceleration.index[::dim_amt], y=['x_acc','y_acc','z_acc', 'x_pred', 'y_pred', 'z_pred'])
fig.show(renderer='browser')

# Stuff

In [None]:
logits = 0
for X in trainloader:
    X = X[0][0].view(3,101).to(DEVICE)
    logits = model.encoder[0](X)
    break

In [None]:
plt.plot(X[0].cpu().detach())

In [None]:
plt.plot(logits[2].cpu().detach())

In [None]:
print(model.state_dict().keys())
plt.plot(model.state_dict()['encoder.0.weight'][3][0].cpu().detach())
plt.plot(model.state_dict()['encoder.0.weight'][3][1].cpu().detach())
plt.plot(model.state_dict()['encoder.0.weight'][3][2].cpu().detach())