https://www.science.org/doi/10.1126/science.1127647

https://github.com/L1aoXingyu/pytorch-beginner/blob/9c86be785c7c318a09cf29112dd1f1a58613239b/08-AutoEncoder/conv_autoencoder.py

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch import nn
from pathlib import Path
from torch.utils.data import TensorDataset, DataLoader, Dataset
from lib.modules import (
    evaluate_loop, 
    pad_for_windowing,
    window_session,
    optimization_loop,
    optimization_loop_xonly
)
from lib.models import LinearAutoencoder, ConvAutoencoder, ConvAutoencoderImproved
from tqdm import tqdm
import plotly.express as px
from datetime import timedelta
from sklearn.model_selection import train_test_split
from lib.datasets import AccRawDataset

In [14]:
WINSIZE = 101
DEVICE = 'cuda:1'
RAW_DIR = Path('/home/musa/datasets/eating_raw/')

In [4]:
recordings = []
for rec_dir in RAW_DIR.iterdir():
    recordings.append(rec_dir)
    print(rec_dir.name)

11-07_20_24_32
2023-10-26_15_32_20
11-07_17_43_30
11-08_08_27_30
11-08_07_17_47
11-10_08_54_24
2023-11-01_15_49_48
11-07_12_58_43
11-01_20_34_28
10-27_00_21_25
11-07_17_29_01
11-01_20_54_52
11-07_15_03_24
10-27_09_45_42
11-02_19_28_19
10-28_13_18_42
10-27_00_20_15


In [5]:
accelerations = []
for session_dir in recordings:
    accel_file = session_dir / f'acceleration-{session_dir.name}.csv'
    if not accel_file.is_file():
        accel_file = session_dir / f'acceleration.csv'

    acceleration = pd.read_csv(accel_file,skiprows=1).rename({'x': 'x_acc', 'y': 'y_acc', 'z': 'z_acc'}, axis=1)
    acceleration = acceleration.dropna()

    acceleration_start_time_seconds = float(pd.read_csv(session_dir / accel_file, nrows=1,header=None).iloc[0,0].split()[-1])/1000
    acceleration.timestamp = ((acceleration.timestamp - acceleration.timestamp[0])*1e-9)+acceleration_start_time_seconds # get timestamp in seconds

    accelerations.append(acceleration)

    print(f'Index: {len(accelerations)-1}, Date: {session_dir.name}, nSamples: {len(acceleration)}, Time Elapsed: {timedelta(seconds=acceleration.timestamp.iloc[-1] - acceleration.timestamp.iloc[0])}')


Index: 0, Date: 11-07_20_24_32, nSamples: 30117, Time Elapsed: 0:04:49.392967
Index: 1, Date: 2023-10-26_15_32_20, nSamples: 2961601, Time Elapsed: 7:53:59.529118
Index: 2, Date: 11-07_17_43_30, nSamples: 1005447, Time Elapsed: 2:41:00.547341
Index: 3, Date: 11-08_08_27_30, nSamples: 5125043, Time Elapsed: 1 day, 6:08:45.212074
Index: 4, Date: 11-08_07_17_47, nSamples: 338215, Time Elapsed: 0:54:09.386096
Index: 5, Date: 11-10_08_54_24, nSamples: 1370732, Time Elapsed: 5:06:32.989927
Index: 6, Date: 2023-11-01_15_49_48, nSamples: 2910132, Time Elapsed: 7:45:46.365347
Index: 7, Date: 11-07_12_58_43, nSamples: 776721, Time Elapsed: 2:04:21.712932
Index: 8, Date: 11-01_20_34_28, nSamples: 127133, Time Elapsed: 0:20:21.474008
Index: 9, Date: 10-27_00_21_25, nSamples: 260457, Time Elapsed: 0:51:35.645162
Index: 10, Date: 11-07_17_29_01, nSamples: 90052, Time Elapsed: 0:14:25.199006
Index: 11, Date: 11-01_20_54_52, nSamples: 4080424, Time Elapsed: 22:11:22.872910
Index: 12, Date: 11-07_15_03

In [65]:
class AccRawDataset(Dataset):
    def __init__(self, accs, winsize):
        super().__init__()
        self.winsize = winsize
        self.X = X.cat(accs)
    
    def __getitem__(self, i):
        if i >= self.__len__():
            raise IndexError("Index Out of Range")

        return self.X[i:i+self.winsize].T.flatten()
    
    def __len__(self):
        return len(self.X) - self.winsize

In [131]:
accs = []
for acc in accelerations:
    # accs.append(pad_for_windowing(torch.Tensor(acc[['x_acc','y_acc','z_acc']].values), WINSIZE))
    accs.append(torch.Tensor(acc[['x_acc','y_acc','z_acc']].values))

chunk_len = 5 * 60 * 60 * 100 # = 1,800,000 samples ie. 5 hours of recording
all_acc = torch.cat(accs, axis=0)
all_acc = all_acc[:len(all_acc) - len(all_acc) % chunk_len] # cut off very last part
all_acc = all_acc.view(-1, chunk_len, 3)

np.random.seed(10)
acctr, accte = train_test_split(all_acc, test_size=0.25)

# Xtr = AccRawDataset(acctr, WINSIZE)
# Xte = AccRawDataset(accte, WINSIZE)

In [139]:
print(len(all_acc) / 100 / 60 / 60)
print(len(acctr) / 100 / 60 / 60)
print(len(accte) / 100 / 60 / 60)

142.90449722222223
107.17837222222222
35.726125


In [167]:
trainloader = DataLoader(Xtr, batch_size=64, shuffle=True)
testloader = DataLoader(Xte, batch_size=64)

torch.save(trainloader, 'pytorch_datasets/trainloader_11-16-23.pt')
torch.save(testloader, 'pytorch_datasets/testloader_11-16-23.pt')

In [None]:
dim_amt = 20
i = 5
fig = px.line(accelerations[i][::dim_amt], x=accelerations[i].index[::dim_amt], y=['x_acc','y_acc','z_acc'])
fig.show(renderer='browser')

In [7]:
trainloader = torch.load('pytorch_datasets/trainloader_11-16-23.pt')
testloader = torch.load('pytorch_datasets/testloader_11-16-23.pt')

In [16]:
model = Test().to(DEVICE)#ConvAutoencoderImproved(winsize=WINSIZE).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
criterion = nn.MSELoss()

In [43]:
accs = []
for acc in accelerations:
    accs.append(pad_for_windowing(torch.Tensor(acc[['x_acc','y_acc','z_acc']].values), WINSIZE))

all_acc = torch.cat(accs[:2], axis=0)

np.random.seed(10)
acctr, accte = train_test_split(all_acc, test_size=0.5)

Xtr = AccRawDataset(acctr, WINSIZE)
Xte = AccRawDataset(accte, WINSIZE)

trainloader = DataLoader(Xtr, batch_size=64, shuffle=True)
testloader = DataLoader(Xte, batch_size=64)

In [45]:
optimization_loop_xonly(model, trainloader, testloader, criterion, optimizer, 5, DEVICE, Path('dev/test'))

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 4: Train Loss: 0.00033262: Dev Loss: 0.00029534: 100%|██████████| 5/5 [02:49<00:00, 33.99s/it]


In [None]:
torch.save(model.state_dict(), 'dev/autoencorder.pt')

# Evaluate

In [10]:
model.load_state_dict(torch.load('dev/autoencoder3/best_model.pt'))

<All keys matched successfully>

In [None]:
# # Recreate a signal with trained model

# test_window = Xtr[0].view()
# # i = 9

# acceleration = accelerations[i]

# Xte = torch.Tensor(acceleration[['x_acc','y_acc','z_acc']].values)
# Xte = pad_for_windowing(Xte, WINSIZE)
# Xte = window_session(Xte, WINSIZE)

# testloader = DataLoader(TensorDataset(Xte), batch_size=64)

# Xpred = []
# for X in tqdm(testloader):
#     X = X[0].to(DEVICE)
#     logits = model(X)
#     Xpred.append(logits)

# Xpred = torch.cat(Xpred)
# Xpred = Xpred.view(-1,3,101)[:,:,50].T # unwindow

# acceleration['x_pred'] = Xpred[0].cpu().detach()
# acceleration['y_pred'] = Xpred[1].cpu().detach()
# acceleration['z_pred'] = Xpred[2].cpu().detach()

In [None]:
# dim_amt = 20
# fig = px.line(acceleration[::dim_amt], x=acceleration.index[::dim_amt], y=['x_acc','y_acc','z_acc', 'x_pred', 'y_pred', 'z_pred'])
# fig.show(renderer='browser')

In [46]:
from lib.modules import read_and_window_nursing_session, read_nursing_session
X,y = read_and_window_nursing_session(58, WINSIZE, Path('/home/musa/datasets/nursingv1'), Path('/home/musa/datasets/eating_labels/'))
testloader = DataLoader(TensorDataset(X), batch_size=64)
acceleration = read_nursing_session(58, Path('/home/musa/datasets/nursingv1'))

In [47]:
# Recreate a signal with trained model

Xpred = []
for X in tqdm(testloader):
    X = X[0].to(DEVICE)
    logits = model(X)
    Xpred.append(logits)

Xpred = torch.cat(Xpred)
Xpred = Xpred.view(-1,3,101)[:,:,50].T # unwindow

acceleration['x_pred'] = Xpred[0].cpu().detach()
acceleration['y_pred'] = Xpred[1].cpu().detach()
acceleration['z_pred'] = Xpred[2].cpu().detach()

  0%|          | 0/3211 [00:00<?, ?it/s]

100%|██████████| 3211/3211 [00:00<00:00, 3877.23it/s]


In [48]:
dim_amt = 5
fig = px.line(acceleration[::dim_amt], x=acceleration.index[::dim_amt], y=['x_acc','y_acc','z_acc', 'x_pred', 'y_pred', 'z_pred'])
fig.show(renderer='browser')

In [31]:
torch.load('dev/autoencoder3/best_model.pt').keys()

odict_keys(['encoder.0.weight', 'encoder.0.bias', 'encoder.3.weight', 'encoder.3.bias', 'encoder.6.weight', 'encoder.6.bias', 'decoder.0.weight', 'decoder.0.bias', 'decoder.4.weight', 'decoder.4.bias', 'decoder.7.weight', 'decoder.7.bias'])

# Stuff

In [None]:
logits = 0
for X in trainloader:
    X = X[0][0].view(3,101).to(DEVICE)
    logits = model.encoder[0](X)
    break

In [None]:
plt.plot(X[0].cpu().detach())

In [None]:
plt.plot(logits[2].cpu().detach())

In [None]:
print(model.state_dict().keys())
plt.plot(model.state_dict()['encoder.0.weight'][3][0].cpu().detach())
plt.plot(model.state_dict()['encoder.0.weight'][3][1].cpu().detach())
plt.plot(model.state_dict()['encoder.0.weight'][3][2].cpu().detach())