In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import math
import torch.optim as optim

In [2]:
d = {
    "good": 0,
    "neutral": 1,
    "bad" : 2
    
}
def encoding(label):
    return d[label]

In [3]:
class PatientDataset(Dataset):
    def __init__(self, df_as_np, labels, seq_len):
        self.data = df_as_np
        self.labels = labels      
        self.seq_len = seq_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [4]:
def load_patient_data(df_as_np, labels, seq_len, batch_size=50):
    dataset = PatientDataset(df_as_np, labels, seq_len)
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return trainloader, valloader, testloader

In [5]:
class RecurrentNetwork(nn.Module):
    def __init__(self, seq_length, hidden_size, num_layers):
        super(RecurrentNetwork, self).__init__()
        self.rnn = nn.RNN(input_size=7, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,  nonlinearity='relu')
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(150,3),           
        )
    
    def forward(self, x):
        x, _ = self.rnn(x)
        return self.classifier(x)

In [6]:
def train(dataloader, lr, epochs):
    model = RecurrentNetwork(seq_length=50, hidden_size=3, num_layers=10)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
    model.train()
    criterion = nn.CrossEntropyLoss()
    batch = 0
    for epoch in range(epochs):
        
        for seq, label in dataloader:
            optimizer.zero_grad()
            outputs = model(seq.float())
            loss = criterion(outputs, label.long())
            loss.backward()
            optimizer.step()
            print(epoch, batch, loss.item())
            batch += 1
    return model

In [7]:
def select_target(df_features_as_np, df_labels_as_np, i, seq_len):
    train_loader, val_loader, test_loader = load_patient_data(df_features_as_np, df_labels_as_np[:, i], seq_len=seq_len, batch_size=500)
    return train_loader, val_loader,test_loader

In [8]:
def test(model, dataloader):
    for seq, labels in dataloader:
        output = model(seq.float())
        pred_labels = torch.argmax(output, dim=1)
        acc = (pred_labels == labels).float().mean().item()
        print(acc)
        print(pred_labels)
        print("##########################")
        print(labels)

In [10]:
def train_and_test(csv_features, csv_labels, feature_cols_to_drop, label_cols_to_drop, features_range, features_shape, seq_len):
    df_features= pd.read_csv(csv_features)
    df_labels = pd.read_csv(csv_labels)
    df_features = df_features.drop(columns=feature_cols_to_drop)
    df_labels = df_labels.drop(columns=label_cols_to_drop)
    for column in df_labels.columns:
        df_labels[column] = df_labels[column].apply(encoding)
    df_features_as_np = df_features.to_numpy()[:features_range,:]
    df_features_as_np = df_features_as_np.reshape(features_shape)
    df_labels_as_np = df_labels.to_numpy()

    print("pre_train shapes")
    print(df_features_as_np.shape)
    print(df_labels_as_np.shape)
    
    for i in range(6):
        print(f"############### LABEL {i} #################")
        train_loader, val_loader, test_loader = select_target(df_features_as_np, df_labels_as_np, i, seq_len)
        model = train(dataloader=train_loader, lr=0.05, epochs=10)
        print("################# TESTING ##############################")
        test(model, test_loader)

Youcef's minute data training

In [11]:
df = pd.read_csv("minute_data_007_youcef/encoded.csv")
print(df)

        Unnamed: 0.2  Unnamed: 0.1  Unnamed: 0                     date  \
0                  0             0       39524  2024-08-20 00:00:34:000   
1                  1             1       39525  2024-08-20 00:01:34:000   
2                  2             2       39526  2024-08-20 00:02:34:000   
3                  3             3       39527  2024-08-20 00:03:34:000   
4                  4             4       39528  2024-08-20 00:04:34:000   
...              ...           ...         ...                      ...   
180967        180967        180967      220491  2025-02-28 02:57:05:000   
180968        180968        180968      220492  2025-02-28 02:58:05:000   
180969        180969        180969      220493  2025-02-28 02:59:05:000   
180970        180970        180970      220494  2025-02-28 03:00:05:000   
180971        180971        180971      220495  2025-02-28 03:01:05:000   

        Accelerometer_X  Accelerometer_Y  Accelerometer_Z  Light_Lux  \
0              0.585266    

In [12]:
df = pd.read_csv("minute_data_007_youcef/labels.csv")
print(df.columns)

Index(['Unnamed: 0', 'average', 'phq_9', 'cgis', 'gad_7', 'wsas', 'qids'], dtype='object')


In [13]:
train_and_test(csv_features="minute_data_007_youcef/encoded.csv", csv_labels="minute_data_007_youcef/labels.csv", feature_cols_to_drop=['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'date', 'date_only', 'time'], label_cols_to_drop=['Unnamed: 0'], features_range=180950, features_shape=(3619, 50, 7), seq_len=50)

pre_train shapes
(3619, 50, 7)
(3619, 6)
############### LABEL 0 #################
0 0 0.9995430707931519
0 1 1.6480705738067627
0 2 0.9081189036369324
0 3 0.9554027318954468
0 4 1.0770231485366821
0 5 1.0493125915527344
1 6 0.9835866093635559
1 7 0.9145917892456055
1 8 0.8560069799423218
1 9 0.9985294938087463
1 10 0.902711033821106
1 11 0.8891479969024658
2 12 0.8999285697937012
2 13 0.8801076412200928
2 14 0.8442625999450684
2 15 0.8979392647743225
2 16 0.8991057276725769
2 17 0.9219344258308411
3 18 0.9204460978507996
3 19 0.8962066769599915
3 20 0.864211916923523
3 21 0.9132024049758911
3 22 0.8800823092460632
3 23 0.8367101550102234
4 24 0.8891657590866089
4 25 0.9203343987464905
4 26 0.9168579578399658
4 27 0.8709755539894104
4 28 0.8651877641677856
4 29 0.8697978854179382
5 30 0.8994148969650269
5 31 0.8432877659797668
5 32 0.8822566866874695
5 33 0.9558109641075134
5 34 0.8991933465003967
5 35 0.8341505527496338
6 36 0.8983315825462341
6 37 0.8105254173278809
6 38 0.9482901096