In [43]:
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, Subset
from torch import nn, optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torchsummary import summary
import seaborn as sns
from copy import deepcopy
import pickle
from tqdm import tqdm
import os
from scipy.interpolate import interp1d
from sklearn.preprocessing import MinMaxScaler
from torch import FloatTensor, tensor
from torch.cuda import FloatTensor as GPUFloatTensor
from torch.utils.data import Dataset, random_split

# Req for package
import sys
sys.path.append("../")
from SkinLearning.NN.Helpers import train, test, DEVICE, getParameterLoss, setSeed
from SkinLearning.NN.Models import MultiTemporal
from SkinLearning.Utils.Dataset import getDataset, getSplit
from SkinLearning.Utils.Plotting import plotParameterBars


torch.backends.cudnn.benchmark = True

In [17]:
"""
Filters the signals based on the gradient of each phase
"""
def filterData(signalFolder="D:/SamplingResults2", sampleFile="../Data/newSamples.pkl", steps=128):
    filtered = []
    samples = []
    runs = []
    
    f = open(f"{sampleFile}", "rb")
    samples = pickle.load(f).astype(np.float32)
    f.close()

    for i, run in enumerate(tqdm(os.listdir(f"{signalFolder}/"))):  
        inp = []
        fail = False

        #if i == 2000:
           # break
            
        files = os.listdir(f"{signalFolder}/{run}/")

        if files != ['Disp1.csv', 'Disp2.csv']:
            continue

        for file in files:
            a = pd.read_csv(f"{signalFolder}/{run}/{file}")
            a.rename(columns = {'0':'x', '0.1': 'y'}, inplace=True)

            if a['x'].max() != 7.0:
                fail = True
                break
            
            # Interpolate curve for consistent x values
            xNew = np.linspace(0, 7, num=steps, endpoint=True)
            interped = interp1d(a['x'], a['y'], kind='cubic', fill_value="extrapolate")(xNew)
            interped[0] = 0.0
            
            if file == "Disp1.csv" and max(interped) > 1:
                print("Found")
                fail = True
                break
            
            if file == "Disp2.csv" and max(interped) > 2:
                print("Found")
                fail = True
                break
                
            # Check no sudden increases/decreases in gradient
            max_def = 0 # x when displacement is first highest
            max_def_final = 0
            min_def = 0 # x when skin fully rebounds
            step_second = 1/(7/steps) # Number of steps in each second
            
            der = 0
            for i in range(len(interped)):
                # Check x where max displacement occurs
                if interped[i] > max(interped)*0.99 and max_def == 0:
                    max_def = i/step_second
                
                if max_def != 0 and interped[i] == max(interped):
                    max_def_final = i/step_second
                
                if max_def != 0 and interped[i] == min(interped[int(max_def_final):]):
                    min_def = i/step_second
   
            # Cannot be within 10% of the max before 1
            # Cannot reach rebound completely before 5.2 seconds
            if min_def-max_def_final < 0.5 or max_def < 1:
                fail = True
                break

            a = interped.astype(np.float32)
            inp.append(a)

        if not fail:
            runs.append(run)
            
    runs = [int(run) for run in runs]
    
    return runs

In [18]:
runs = filterData()

100%|███████████████████████████████████████████████████████████████████████████| 65536/65536 [05:17<00:00, 206.43it/s]


In [51]:
with open("../Data/filtered.pkl", "rb") as f:
    old_runs = pickle.load(f)

In [52]:
print(len(runs), len(old_runs))

2241 2241


In [34]:
class SkinDataset(Dataset):
    def __init__(
        self,
        scaler,
        signal_folder="D:/SamplingResults2",
        sample_file="../Data/newSamples.pkl",
        runs=range(65535),
        steps=128
        ):
        # Load both disp1 and disp2 from each folder
        # Folders ordered according to index of sample
        self.input = []
        self.output = []
        
        with open(f"{sample_file}", "rb") as f:
             samples = pickle.load(f)
        
        for run in tqdm(runs):
            inp = []
            fail = False
            
            files = os.listdir(f"{signal_folder}/{run}/")
            
            if files != ['Disp1.csv', 'Disp2.csv']:
                continue
            
            for file in files:
                a = pd.read_csv(f"{signal_folder}/{run}/{file}")
                a.rename(columns = {'0':'x', '0.1': 'y'}, inplace = True)
                
                # Skip if unconverged
                if a['x'].max() != 7.0:
                    fail = True
                    break

                # Interpolate curve for consistent x values
                xNew = np.linspace(0, 7, num=steps, endpoint=False)
                interped = interp1d(a['x'], a['y'], kind='cubic', fill_value="extrapolate")(xNew)        
                
                inp.append(interped.astype("float32"))
            
            if not fail:
                if len(inp) != 2:
                    raise Exception("sdf")

                self.input.append(inp)
                self.output.append(samples[int(run)])
        
        # Normalise output variables
        self.output = scaler.fit_transform(self.output)
        
        self.output = tensor(self.output).type(
            FloatTensor if DEVICE == 'cpu' else GPUFloatTensor
        )
        self.input = tensor(np.array(self.input)).type(
            FloatTensor if DEVICE == 'cpu' else GPUFloatTensor
        )
        
    def __len__(self):
        return len(self.output)
    
    def __getitem__(self, idx):
        sample = {"input": self.input[idx], "output": self.output[idx]}
        return sample

In [35]:
"""
    Creates the data set from filtered samples
    Returns the dataset and the scaler
"""
def getDataset(**kwargs):
    # Get filtered data
    if "Data" in os.listdir():
        filtered_file = "Data/filtered.pkl"
        kwargs['sample_file'] = "Data/newSamples.pkl"
        kwargs['signal_folder'] = "../SamplingResults2/"
    else:
        filtered_file = "../Data/filtered.pkl"

    if not 'runs' in kwargs.keys():
        with open(filtered_file, "rb") as f:
            runs = pickle.load(f)

        kwargs['runs'] = runs

    scaler = MinMaxScaler()
    dataset = SkinDataset(scaler=scaler, **kwargs)

    return dataset, scaler

"""
    Creates a train/test split from the given data
    Returns train and test data loaders
"""
def getSplit(dataset, p1=0.8, batch_size=32):
    train_n = int(p1 * len(dataset))
    test_n = len(dataset) - train_n
    train_set, test_set = random_split(dataset, [train_n, test_n])

    return DataLoader(train_set, batch_size=batch_size, shuffle=True), \
        DataLoader(test_set, batch_size=batch_size, shuffle=True)

In [44]:
dataset, scaler = getDataset()

100%|█████████████████████████████████████████████████████████████████████████████| 2241/2241 [00:10<00:00, 212.19it/s]


In [45]:
train_loader, test_loader = getSplit(dataset)

In [48]:
lstm = MultiTemporal(out="f_output", single_fc=False, temporal_type="LSTM")

In [49]:
train_loss, val_loss = train(train_loader, lstm, early_stopping=True, epochs=1500, val_loader=test_loader)

Using: cuda


100%|███████████████████████████████████████████████████████████████████████████████| 56/56 [00:01<00:00, 40.43batch/s]
100%|██████████████████████████████| 56/56 [00:01<00:00, 41.66batch/s, counter=0, epoch=1, lastLoss=1.84, valLoss=0.21]
100%|████████████████████████████| 56/56 [00:01<00:00, 42.18batch/s, counter=0, epoch=2, lastLoss=0.186, valLoss=0.191]
100%|████████████████████████████| 56/56 [00:01<00:00, 42.12batch/s, counter=0, epoch=3, lastLoss=0.188, valLoss=0.186]
100%|████████████████████████████| 56/56 [00:01<00:00, 41.82batch/s, counter=1, epoch=4, lastLoss=0.188, valLoss=0.187]
100%|████████████████████████████| 56/56 [00:01<00:00, 41.93batch/s, counter=2, epoch=5, lastLoss=0.187, valLoss=0.194]
100%|█████████████████████████████| 56/56 [00:01<00:00, 41.61batch/s, counter=3, epoch=6, lastLoss=0.189, valLoss=0.19]
100%|████████████████████████████| 56/56 [00:01<00:00, 41.70batch/s, counter=4, epoch=7, lastLoss=0.187, valLoss=0.192]
100%|████████████████████████████| 56/56

KeyboardInterrupt: 

In [53]:
len(dataset)

2241