In [None]:
import os
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
import torch.nn.functional as F

MAIN_DIR = "input/tlvmc-parkinsons-freezing-gait-prediction/"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

FEATURES = ["AccV", "AccML", "AccAP"]
TARGETS = ["StartHesitation", "Turn", "Walking"]

N_EPOCHS = 1

In [None]:


def reduce_memory_usage(df):

    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype.name
        if ((col_type != 'datetime64[ns]') & (col_type != 'category')):
            if (col_type != 'object'):
                c_min = df[col].min()
                c_max = df[col].max()

                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)

                else:
                    if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                        df[col] = df[col].astype(np.float16)
                    elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        pass
            else:
                df[col] = df[col].astype('category')
    mem_usg = df.memory_usage().sum() / 1024**2
    print("Memory usage became: ",mem_usg," MB")

    return df

In [None]:
def read_data(
    dataset,
    datatype,
    subject_id = None):

    metadata = pd.read_csv(MAIN_DIR + dataset + "_metadata.csv")

    DATA_ROOT = MAIN_DIR + datatype + "/" + dataset

    if subject_id is not None:
        files = [file for file in files if subject_id in file]

    df_res = pd.DataFrame()
    for root, dirs, files in os.walk(DATA_ROOT):
        for name in tqdm(files):
            f = os.path.join(root, name)
            query_datatype = pd.read_csv(f)
            query_datatype["file"] = name.replace(".csv", "")
            df_res = pd.concat([df_res,query_datatype])

    df_res = metadata.merge(df_res,
                          how = 'inner',
                          left_on = 'Id',
                          right_on = 'file')
    df_res = df_res.drop(["file"], axis = 1)

    df_res = reduce_memory_usage(df_res)

    return df_res


# Load Data

In [None]:
class FOGDataset(Dataset):

    @staticmethod
    def encode_target(data, targets_list):
        conditions = []
        for target in targets_list:
            conditions.append((data[target] == 1))

        event = np.select(conditions, targets_list, default='Normal')
        le = LabelEncoder()
        return le.fit_transform(event)

    @staticmethod
    def get_features_target(data, features_list, datatype):
        if datatype == "train":
            features, target = data[features_list], data["target"]
            return features, target
        else:
            features = data[features_list]
            return features

    def __init__(self, dataset, datatype, features_list, targets_list, lookback):
        self.datatype = datatype
        self.data = read_data(dataset = dataset, datatype = datatype)
        self.features = features_list
        self.targets = targets_list
        self.data["Id_encoded"], _ = pd.factorize(self.data["Id"])
        self.lookback = lookback

        if datatype == "train":
            self.data = self.data[:1_000]
            self.data["target"] = FOGDataset.encode_target(self.data, self.targets)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.datatype == "train":
            features, targets = FOGDataset.get_features_target(self.data,
                                               self.features,
                                               self.datatype
                                              )

            if idx < self.lookback :
                features = features[0: self.lookback]
                targets = targets[self.lookback]

            else:
                features = features[idx - self.lookback: idx]
                targets = targets[idx]

            features = torch.tensor(features.to_numpy(), dtype=torch.float32)
            targets = torch.tensor(targets, dtype=torch.float32)

            return features, targets
        else:
            features = FOGDataset.get_features_target(self.data,
                                               self.features,
                                               self.datatype
                                              )

            if idx < self.lookback :
                features = features[0: self.lookback]

            else:
                features = features[idx - self.lookback: idx]

            features = torch.tensor(features.to_numpy(), dtype=torch.float32)

            return features

In [None]:
dataset_train = FOGDataset(
    dataset = "tdcsfog",
    datatype = "train",
    features_list = FEATURES,
    targets_list = TARGETS,
    lookback = 2
)


100%|██████████| 833/833 [02:13<00:00,  6.22it/s]


Memory usage of dataframe is 700.49 MB
Memory usage became:  181.89868545532227  MB


In [None]:
dataset_test = FOGDataset(
    dataset = "tdcsfog",
    datatype = "test",
    features_list = FEATURES,
    targets_list = TARGETS,
    lookback = 2
)

100%|██████████| 1/1 [00:00<00:00, 70.99it/s]

Memory usage of dataframe is 0.36 MB
Memory usage became:  0.09409904479980469  MB





In [None]:
dataloader_train = DataLoader(dataset_train, batch_size = 8, shuffle = False)
dataloader_test = DataLoader(dataset_test, batch_size = 1000, shuffle = False)

count = 0

for batch in dataloader_train:
    features, target = batch
    print("FEATURES EXAMPLES")
    print(features.shape)
    print(features)
    print("TARGET EXAMPLES")
    print(target)
    print("\n")
    if count > 1:
        break
    count += 1

FEATURES EXAMPLES
torch.Size([8, 2, 3])
tensor([[[-9.5312,  0.5664, -1.4131],
         [-9.5391,  0.5640, -1.4404]],

        [[-9.5312,  0.5664, -1.4131],
         [-9.5391,  0.5640, -1.4404]],

        [[-9.5312,  0.5664, -1.4131],
         [-9.5391,  0.5640, -1.4404]],

        [[-9.5391,  0.5640, -1.4404],
         [-9.5312,  0.5615, -1.4297]],

        [[-9.5312,  0.5615, -1.4297],
         [-9.5312,  0.5645, -1.4150]],

        [[-9.5312,  0.5645, -1.4150],
         [-9.5391,  0.5620, -1.4297]],

        [[-9.5391,  0.5620, -1.4297],
         [-9.5391,  0.5527, -1.4141]],

        [[-9.5391,  0.5527, -1.4141],
         [-9.5312,  0.5479, -1.4141]]])
TARGET EXAMPLES
tensor([0., 0., 0., 0., 0., 0., 0., 0.])


FEATURES EXAMPLES
torch.Size([8, 2, 3])
tensor([[[-9.5312,  0.5479, -1.4141],
         [-9.5234,  0.5527, -1.4160]],

        [[-9.5234,  0.5527, -1.4160],
         [-9.5391,  0.5527, -1.4199]],

        [[-9.5391,  0.5527, -1.4199],
         [-9.5312,  0.5483, -1.4160]],

   

In [None]:
class LSTMNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):

        hidden_state = torch.zeros((self.num_layers, x.size(0), self.hidden_size), dtype=torch.float32)
        cell_state = torch.zeros((self.num_layers, x.size(0), self.hidden_size), dtype=torch.float32)

        out, _ = self.lstm(x, (hidden_state, cell_state))
        out = out[:, -1,:]
        out = self.fc1(out)
        return out


# 4. Training

In [None]:
def train(model, dataloader, loss_fn, optimizer):
    model.train()
    total_loss = 0
    for epoch in range(N_EPOCHS):
        mean_precision = []
        for (features, targets) in tqdm(dataloader):
            optimizer.zero_grad()
            preds = model(features)
            loss = loss_fn(preds, targets.long())
            mean_precision.append(loss.item())
            loss.backward()
            optimizer.step()

        print("Average Precision : ", np.mean(mean_precision))

    return model

def predict(model, dataloader):
    model.eval()
    predictions = np.empty(len(dataset_test))
    count = 0
    for features in tqdm(dataloader):
        preds = model(features)
        preds = torch.argmax(preds, dim = 1)
        preds = preds.numpy()
        predictions[count : count + len(preds)] = preds
        count += len(preds)

    return predictions

In [None]:
INPUT_SIZE = len(FEATURES)
HIDDEN_SIZE = 10
NUM_LAYERS = 1
NUM_CLASSES = 4
PARAMS = {
    "input_size" : INPUT_SIZE,
    "hidden_size" : HIDDEN_SIZE,
    "num_layers" : NUM_LAYERS,
    "num_classes" : NUM_CLASSES
}
model = LSTMNet(**PARAMS)

loss_fn = CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
model = train(
    model,
    dataloader_train,
    loss_fn,
    optimizer
)

100%|██████████| 125/125 [00:00<00:00, 144.45it/s]

Average Precision :  0.6833792561292649





In [None]:
preds_tdcsfog = predict(model, dataloader_test)

100%|██████████| 5/5 [00:02<00:00,  1.95it/s]


In [None]:
preds_tdcsfog

array([0., 0., 0., ..., 0., 0., 0.])