In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import math

In [2]:
from datetime import datetime

In [3]:
def get_df(parquet):
    df = pd.read_parquet(parquet)

    actual_first_row = df.columns.to_list()
    new_headers = ["date_time", "A", "B", "C", "D", "E", "F"]
    df.columns = new_headers
    df.loc[-1] = actual_first_row
    df.index = df.index + 1
    df = df.sort_index()
    df.columns = new_headers

    return df

In [4]:
class RecurrentNetwork(nn.Module):
    def __init__(self, seq_length, hidden_size, num_layers):
        self.rnn = nn.RNN(input_size=seq_length, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,  nonlinearity='relu')
    
    def forward(self, x):
        return self.rnn(x)

In [5]:
class PatientDataset(Dataset):
    def __init__(self, df_as_np, seq_len):
        self.data = df_as_np
      
        self.seq_len = seq_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx:idx+self.seq_len]


In [6]:
def load_patient_data(df_as_np, seq_len, batch_size=100):
    dataset = PatientDataset(df_as_np, seq_len)
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return trainloader, valloader, testloader

In [7]:
def date_encoding(date_time_string):
    date_string = date_time_string.split(" ")[0]
    date_object = datetime.strptime(date_string, "%Y-%m-%d")
    return date_object.weekday()/7

In [8]:
def time_encoding(date_time_string):
    time = date_time_string.split(" ")[1]
    splitted = time.split(":")
    hour = int(splitted[0])
    minute = int(splitted[1])
    second = int(splitted[2])
    millisecond = int(splitted[3])
    l = [hour, minute, second, millisecond]
    arr = np.array(l, dtype=np.float64)

    arr[1] /= 60
    arr[2] /= 3600
    arr[3] /= 3600000

    total_hours = arr[0] + arr[1] + arr[2] + arr[3]

    return total_hours/24

In [9]:
def actiography_features_encoding(df, feature):
    df[feature] = df[feature].astype(float)
    mean = df[feature].mean()
    sd = math.sqrt(df[feature].var())
    df[feature] = (df[feature]-mean)/sd
    return df

In [None]:
def encode_df_to_array(df):
    df["date_encoded"] = df["date_time"].apply(date_encoding)
    df["time_encoded"] = df["date_time"].apply(time_encoding)
    actiography_features = ["A", "B", "C", "D", "E", "F"]

    for f in actiography_features:
        if f != "E":
            df = actiography_features_encoding(df=df, feature=f)

    return df

In [3]:
df = pd.read_parquet("parquet/DD_04_2025_019/Month 2/DD042025019_left wrist_101696_2025-07-15 11-53-15.parquet")
print(df.head())

   2025-06-17 12:50:10:000  0.0348  1.0717  -0.1336  287  0  42.2
0  2025-06-17 12:50:10:020  0.0777  0.9572  -0.0659  287  0  42.2
1  2025-06-17 12:50:10:040  0.0153  1.0243  -0.0619  270  0  42.2
2  2025-06-17 12:50:10:060 -0.0511  1.0717  -0.0738  270  0  42.2
3  2025-06-17 12:50:10:080  0.0465  0.9493  -0.0619  287  0  42.2
4  2025-06-17 12:50:10:100  0.0699  0.9651  -0.0260  287  0  42.2


In [10]:
df = pd.read_parquet("cleaned_parquet/007/Month 7/DD072024007__100889_2025-03-26 12-05-58.parquet")
print(df.head())

                     date_time         A         B         C         D   E  \
index                                                                        
0      2025-01-30 12:10:05:000  2.441999 -0.365826 -0.354713  0.628275 NaN   
1      2025-01-30 12:10:05:020  2.505794 -0.480054 -0.445056  0.633710 NaN   
2      2025-01-30 12:10:05:040  2.473996 -0.569790 -0.451497  0.633710 NaN   
3      2025-01-30 12:10:05:060  2.665382 -0.586050 -0.451497  0.639145 NaN   
4      2025-01-30 12:10:05:080  2.649483 -0.259625 -0.516074  0.639145 NaN   

              F  date_encoded  time_encoded  
index                                        
0      1.430924      0.428571      0.507002  
1      1.430924      0.428571      0.507003  
2      1.430924      0.428571      0.507003  
3      1.430924      0.428571      0.507003  
4      1.430924      0.428571      0.507003  
