In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import math
import torch.optim as optim

In [2]:
d = {
    "good": 0,
    "neutral": 1,
    "bad" : 2
    
}
def encoding(label):
    return d[label]

In [3]:
class PatientDataset(Dataset):
    def __init__(self, df_as_np, labels, seq_len):
        self.data = df_as_np
        self.labels = labels      
        self.seq_len = seq_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [4]:
def load_patient_data(df_as_np, labels, seq_len, batch_size=500):
    dataset = PatientDataset(df_as_np, labels, seq_len)
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return trainloader, valloader, testloader

In [5]:
class RecurrentNetwork(nn.Module):
    def __init__(self, seq_length, hidden_size, num_layers):
        super(RecurrentNetwork, self).__init__()
        self.rnn = nn.RNN(input_size=8, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,  nonlinearity='relu')
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(30,3),           
        )
    
    def forward(self, x):
        x, _ = self.rnn(x)
        return self.classifier(x)

In [17]:
df_features= pd.read_csv("cleaned_parquet/007/sample_features.csv")
df_labels = pd.read_csv("cleaned_parquet/007/sample_labels.csv")
df_features = df_features.drop(columns=["Unnamed: 0.1", "Unnamed: 0", "date_time", "date"])
df_labels = df_labels.drop(columns=["Unnamed: 0.1", "Unnamed: 0"])
for column in df_labels.columns:
    df_labels[column] = df_labels[column].apply(encoding)

df_features_as_np = df_features.to_numpy()
df_features_as_np = df_features_as_np.reshape(24000, 10, 8)
df_labels_as_np = df_labels.to_numpy()
print(df_features_as_np.shape)
print(df_labels_as_np.shape)
print(df_labels_as_np)

(24000, 10, 8)
(24000, 6)
[[2 1 1 2 1 2]
 [2 1 1 2 1 2]
 [2 1 1 2 1 2]
 ...
 [1 1 1 1 1 1]
 [1 1 0 1 1 1]
 [1 1 0 1 1 1]]


In [35]:
def train(dataloader, lr, epochs):
    model = RecurrentNetwork(seq_length=10, hidden_size=3, num_layers=10)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
    model.train()
    criterion = nn.CrossEntropyLoss()
    batch = 0
    for epoch in range(epochs):
        
        for seq, label in dataloader:
            optimizer.zero_grad()
            outputs = model(seq.float())
            loss = criterion(outputs, label.long())
            loss.backward()
            optimizer.step()
            print(epoch, batch, loss.item())
            batch += 1
    return model

In [36]:
def select_target(i):
    train_loader, val_loader, test_loader = load_patient_data(df_features_as_np, df_labels_as_np[:, i], seq_len=10, batch_size=500)
    return train_loader, val_loader,test_loader

In [49]:
train_loader, val_loader, test_loader = select_target(i=5)
model = train(dataloader=train_loader, lr=0.001, epochs=15)

0 0 1.0236643552780151
0 1 1.0146995782852173
0 2 1.010332465171814
0 3 1.01027250289917
0 4 0.9908196926116943
0 5 0.9943972826004028
0 6 1.0128597021102905
0 7 1.0060086250305176
0 8 1.0169541835784912
0 9 0.9846461415290833
0 10 0.999106764793396
0 11 1.0048619508743286
0 12 1.001754641532898
0 13 0.9924237728118896
0 14 0.9979787468910217
0 15 0.9823379516601562
0 16 0.9909963011741638
0 17 1.011032223701477
0 18 0.9704959392547607
0 19 0.9552665948867798
0 20 0.9864574670791626
0 21 0.9903250336647034
0 22 0.9571542143821716
0 23 0.9692123532295227
0 24 0.9748493432998657
0 25 0.9815436005592346
0 26 0.9642937183380127
0 27 0.9472147226333618
0 28 0.9742161631584167
0 29 0.9546472430229187
0 30 0.9545081257820129
0 31 0.9851078987121582
0 32 0.9420884251594543
0 33 0.9295582175254822
0 34 0.9630100727081299
0 35 0.9397223591804504
0 36 0.946564257144928
0 37 0.9318797588348389
0 38 0.9396010637283325
1 39 0.9869094491004944
1 40 0.9255378246307373
1 41 0.9646530151367188
1 42 0.97

In [50]:
def test(model, dataloader):
    for seq, labels in dataloader:
        output = model(seq.float())
        pred_labels = torch.argmax(output, dim=1)
        acc = (pred_labels == labels).float().mean().item()
        print(acc)
        print(pred_labels)
        print("##########################")
        print(labels)

In [51]:
test(model, test_loader)

0.6240000128746033
tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2