In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import math

In [2]:
from datetime import datetime

In [4]:
def get_df(parquet):
    df = pd.read_parquet(parquet)

    actual_first_row = df.columns.to_list()
    new_headers = ["date_time", "A", "B", "C", "D", "E", "F"]
    df.columns = new_headers
    df.loc[-1] = actual_first_row
    df.index = df.index + 1
    df = df.sort_index()
    df.columns = new_headers

    return df

In [5]:
class RecurrentNetwork(nn.Module):
    def __init__(self, seq_length, hidden_size, num_layers):
        self.rnn = nn.RNN(input_size=seq_length, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,  nonlinearity='relu')
    
    def forward(self, x):
        return self.rnn(x)

In [23]:
class PatientDataset(Dataset):
    def __init__(self, df_as_np, seq_len):
        self.data = df_as_np
      
        self.seq_len = seq_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx:idx+self.seq_len]


In [7]:
def load_patient_data(df_as_np, seq_len, batch_size=100):
    dataset = PatientDataset(df_as_np, seq_len)
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return trainloader, valloader, testloader

In [8]:
def date_encoding(date_time_string):
    date_string = date_time_string.split(" ")[0]
    date_object = datetime.strptime(date_string, "%Y-%m-%d")
    return date_object.weekday()/7

In [9]:
def time_encoding(date_time_string):
    time = date_time_string.split(" ")[1]
    splitted = time.split(":")
    hour = int(splitted[0])
    minute = int(splitted[1])
    second = int(splitted[2])
    millisecond = int(splitted[3])
    l = [hour, minute, second, millisecond]
    arr = np.array(l, dtype=np.float64)

    arr[1] /= 60
    arr[2] /= 3600
    arr[3] /= 3600000

    total_hours = arr[0] + arr[1] + arr[2] + arr[3]

    return total_hours/24

In [10]:
def actiography_features_encoding(df, feature):
    mean = df[feature].mean()
    sd = math.sqrt(df[feature].var())
    df[feature + "_encoded"] = (df[feature + "_encoded"]-mean)/sd
    df.drop([feature], axis=1)
    return df

In [11]:
def encode_df_to_array(df):
    df["date_encoded"] = df["date_time"].apply(date_encoding)
    df["time_encoded"] = df["date_time"].apply(time_encoding)
    df.drop(["date_time"], axis = 1)
    actiography_features = ["A", "B", "C", "D", "E", "F"]

    for f in actiography_features:
        df = actiography_features_encoding(df=df, feature=f)

    return df

In [12]:
df = pd.read_parquet("cleaned_parquet/DD_05_2025/Month 1/DD052025022_left wrist_107718_2025-07-23 12-13-35.parquet")


In [14]:
df = df.drop(columns=["date_time"])

In [15]:

np_arr = df.to_numpy()

In [21]:
print(np_arr.shape)

(116011200, 8)


In [25]:
train, _, _ = load_patient_data(df_as_np=np_arr, seq_len=100, batch_size=100)

for r in train:
    print(r)
    break

tensor([[[-7.6679e-01,  6.3660e-01,  1.2277e+00,  ...,  1.7410e-01,
           8.5714e-01,  6.9178e-01],
         [-7.9499e-01,  6.2118e-01,  1.2474e+00,  ...,  1.7410e-01,
           8.5714e-01,  6.9178e-01],
         [-7.8080e-01,  6.2118e-01,  1.2540e+00,  ...,  1.7410e-01,
           8.5714e-01,  6.9178e-01],
         ...,
         [-7.3859e-01,  6.1338e-01,  1.2474e+00,  ...,  1.7410e-01,
           8.5714e-01,  6.9180e-01],
         [-7.5278e-01,  6.0557e-01,  1.2408e+00,  ...,  1.7410e-01,
           8.5714e-01,  6.9180e-01],
         [-7.6679e-01,  6.2118e-01,  1.2343e+00,  ...,  1.7410e-01,
           8.5714e-01,  6.9180e-01]],

        [[ 5.6285e-01, -2.0326e+00,  1.8633e-01,  ..., -2.9709e+00,
           7.1429e-01,  1.9199e-01],
         [ 5.7003e-01, -2.0326e+00,  1.8633e-01,  ..., -2.9709e+00,
           7.1429e-01,  1.9199e-01],
         [ 5.5584e-01, -2.0170e+00,  1.9274e-01,  ..., -2.9709e+00,
           7.1429e-01,  1.9199e-01],
         ...,
         [ 4.9262e-01, -2