In [8]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd
import numpy as np
import math

In [2]:
from datetime import datetime

In [3]:
def get_df(parquet):
    df = pd.read_parquet(parquet)

    actual_first_row = df.columns.to_list()
    new_headers = ["date_time", "A", "B", "C", "D", "E", "F"]
    df.columns = new_headers
    df.loc[-1] = actual_first_row
    df.index = df.index + 1
    df = df.sort_index()
    df.columns = new_headers

    return df

In [4]:
class RecurrentNetwork(nn.Module):
    def __init__(self, seq_length, hidden_size, num_layers):
        self.rnn = nn.RNN(input_size=seq_length, hidden_size=hidden_size, num_layers=num_layers, batch_first=True,  nonlinearity='relu')
    
    def forward(self, x):
        return self.rnn(x)

In [5]:
class PatientDataset(Dataset):
    def __init__(self, df_as_np, seq_len):
        self.data = df_as_np
      
        self.seq_len = seq_len
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        return self.data[idx:idx+self.seq_len]


In [6]:
def load_patient_data(df_as_np, seq_len, batch_size=100):
    dataset = PatientDataset(df_as_np, seq_len)
    train_size = int(0.8 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

    trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    testloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return trainloader, valloader, testloader

In [7]:
def date_encoding(date_time_string):
    date_string = date_time_string.split(" ")[0]
    date_object = datetime.strptime(date_string, "%Y-%m-%d")
    return date_object.weekday()/7

In [8]:
def time_encoding(date_time_string):
    time = date_time_string.split(" ")[1]
    splitted = time.split(":")
    hour = int(splitted[0])
    minute = int(splitted[1])
    second = int(splitted[2])
    millisecond = int(splitted[3])
    l = [hour, minute, second, millisecond]
    arr = np.array(l, dtype=np.float64)

    arr[1] /= 60
    arr[2] /= 3600
    arr[3] /= 3600000

    total_hours = arr[0] + arr[1] + arr[2] + arr[3]

    return total_hours/24

In [9]:
def actiography_features_encoding(df, feature):
    df[feature] = df[feature].astype(float)
    mean = df[feature].mean()
    sd = math.sqrt(df[feature].var())
    df[feature] = (df[feature]-mean)/sd
    return df

In [None]:
def encode_df_to_array(df):
    df["date_encoded"] = df["date_time"].apply(date_encoding)
    df["time_encoded"] = df["date_time"].apply(time_encoding)
    actiography_features = ["A", "B", "C", "D", "E", "F"]

    for f in actiography_features:
        if f != "E":
            df = actiography_features_encoding(df=df, feature=f)

    return df

In [37]:
df = pd.read_parquet("cleaned_parquet/007/Month 2/DD072024007_left wrist_101697_2024-09-18 11-15-01.parquet")
print(df)

                       date_time         A         B         C         D    E  \
index                                                                           
0        2024-08-20 13:13:48:000  1.539785 -0.702989 -1.288003 -0.102814  0.0   
1        2024-08-20 13:13:48:020  1.715609 -0.899471 -1.261651 -0.113405  0.0   
2        2024-08-20 13:13:48:040  1.659757 -0.825892 -1.221955 -0.121347  0.0   
3        2024-08-20 13:13:48:060  1.739603 -0.801230 -1.221955 -0.121347  0.0   
4        2024-08-20 13:13:48:080  1.731739 -0.809383 -1.307851 -0.129290  0.0   
...                          ...       ...       ...       ...       ...  ...   
8170495  2024-08-22 10:37:17:900 -0.425730  0.680335 -0.977277  2.837380  0.0   
8170496  2024-08-22 10:37:17:920 -0.489647  0.655673 -0.997125  2.837380  0.0   
8170497  2024-08-22 10:37:17:940 -0.425730  0.655673 -0.970773  2.837380  0.0   
8170498  2024-08-22 10:37:17:960 -0.425730  0.704793 -0.990620  2.837380  0.0   
8170499  2024-08-22 10:37:17

In [35]:
df = pd.read_parquet("cleaned_parquet/007/Month 1/DD072024007_left wrist_101698_2024-08-20 13-09-13.parquet")
print(df)

                         date_time         A         B         C         D  \
index                                                                        
0          2024-07-23 13:16:34:000 -0.691692 -0.069835  1.108325 -0.168220   
1          2024-07-23 13:16:34:020 -0.682757 -0.038617  1.108325 -0.168220   
2          2024-07-23 13:16:34:040 -0.673822 -0.030714  1.127044 -0.168220   
3          2024-07-23 13:16:34:060 -0.665111 -0.030714  1.127044 -0.168220   
4          2024-07-23 13:16:34:080 -0.665111 -0.062130  1.139366 -0.166802   
...                            ...       ...       ...       ...       ...   
120938395  2024-08-20 13:09:21:900  0.699042  2.226066 -0.596272 -0.196660   
120938396  2024-08-20 13:09:21:920  0.468743  1.724406 -0.453234 -0.195242   
120938397  2024-08-20 13:09:21:940  0.096599  1.849870 -0.440755 -0.193824   
120938398  2024-08-20 13:09:21:960  0.149763  1.269967 -0.789225 -0.193824   
120938399  2024-08-20 13:09:21:980  0.300318  0.956602 -0.894982

In [9]:
def get_seq(label, data_path, name):
    df = pd.read_parquet(data_path)
    df_new = df[df["date"] == label]
    df_new.to_parquet(name, index=False)


In [10]:
get_seq("2024-08-20", data_path="cleaned_parquet/007/Month 1/DD072024007_left wrist_101698_2024-08-20 13-09-13.parquet", name="20August.parquet")

In [13]:
date_string = "2024-08-"
date_strings = []
for i in range(21, 32):
    date_strings.append(date_string + str(i))

In [14]:
print(date_strings)

['2024-08-21', '2024-08-22', '2024-08-23', '2024-08-24', '2024-08-25', '2024-08-26', '2024-08-27', '2024-08-28', '2024-08-29', '2024-08-30', '2024-08-31']


In [15]:
date_string = "2024-09-"
date_strings = []
for i in range(0, 18):
    date_strings.append(date_string + str(i))

print(date_strings)

['2024-09-0', '2024-09-1', '2024-09-2', '2024-09-3', '2024-09-4', '2024-09-5', '2024-09-6', '2024-09-7', '2024-09-8', '2024-09-9', '2024-09-10', '2024-09-11', '2024-09-12', '2024-09-13', '2024-09-14', '2024-09-15', '2024-09-16', '2024-09-17']


In [39]:
get_seq("2024-08-23", data_path="cleaned_parquet/007/Month 3/DD072024007_left wrist_101696_2024-10-22 15-22-36.parquet", name="seq_daily/23August.parquet")

In [40]:
df = pd.read_parquet("seq_daily/23August.parquet")
print(df)

Empty DataFrame
Columns: [date_time, A, B, C, D, E, F, date_encoded, time_encoded, date]
Index: []


In [42]:
df = pd.read_parquet("cleaned_parquet/007/Month 2/DD072024007_left wrist_101697_2024-09-18 11-15-01.parquet")
print(df)

                       date_time         A         B         C         D    E  \
index                                                                           
0        2024-08-20 13:13:48:000  1.539785 -0.702989 -1.288003 -0.102814  0.0   
1        2024-08-20 13:13:48:020  1.715609 -0.899471 -1.261651 -0.113405  0.0   
2        2024-08-20 13:13:48:040  1.659757 -0.825892 -1.221955 -0.121347  0.0   
3        2024-08-20 13:13:48:060  1.739603 -0.801230 -1.221955 -0.121347  0.0   
4        2024-08-20 13:13:48:080  1.731739 -0.809383 -1.307851 -0.129290  0.0   
...                          ...       ...       ...       ...       ...  ...   
8170495  2024-08-22 10:37:17:900 -0.425730  0.680335 -0.977277  2.837380  0.0   
8170496  2024-08-22 10:37:17:920 -0.489647  0.655673 -0.997125  2.837380  0.0   
8170497  2024-08-22 10:37:17:940 -0.425730  0.655673 -0.970773  2.837380  0.0   
8170498  2024-08-22 10:37:17:960 -0.425730  0.704793 -0.990620  2.837380  0.0   
8170499  2024-08-22 10:37:17

In [43]:
df = pd.read_parquet("cleaned_parquet/007/Month 3/DD072024007_left wrist_101696_2024-10-22 15-22-36.parquet")
print(df)

                         date_time         A         B         C         D  \
index                                                                        
0          2024-09-18 11:07:12:000 -1.142986 -0.077163  1.027523 -0.067997   
1          2024-09-18 11:07:12:020 -0.990784 -0.008477  1.002565 -0.067997   
2          2024-09-18 11:07:12:040 -0.904758 -0.260584  1.002565 -0.067997   
3          2024-09-18 11:07:12:060 -0.865053 -0.314179  1.065039 -0.071047   
4          2024-09-18 11:07:12:080 -0.931227 -0.337010  1.140071 -0.074287   
...                            ...       ...       ...       ...       ...   
128188795  2024-10-18 03:16:47:900 -1.579906  0.457623 -0.141280 -0.106497   
128188796  2024-10-18 03:16:47:920 -1.586524  0.472908 -0.147558 -0.106497   
128188797  2024-10-18 03:16:47:940 -1.586524  0.450077 -0.147558 -0.106497   
128188798  2024-10-18 03:16:47:960 -1.599759  0.442337 -0.153837 -0.106497   
128188799  2024-10-18 03:16:47:980 -1.573289  0.450077 -0.153837