Preprocessing

In [1]:
#connect to gdrive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!ls "/content/drive/My Drive/Colab Notebooks"

 data   data_preparer.py  'H&M.ipynb'   __pycache__   train.ipynb


In [3]:
#imports
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm
import sys
sys.path.insert(0,"/content/drive/My Drive/Colab Notebooks")
import data_preparer
from pathlib import Path
from keras.utils import to_categorical
import torch
import tensorflow as tf

In [4]:
#prepare mask for vectors
rel = data_preparer.get_vectors_relative_position(7, 180, 20)
abss = data_preparer.get_vectors_absolute_position(rel)
mask = data_preparer.get_mask(abss)

Spacing: 30.0 degrees


In [5]:
#read recon file
file = "sub001_2drt_07_grandfather1_r1_recon.h5"
filepath = f"/content/drive/My Drive/Colab Notebooks/data/sub001/2drt/recon/{file}"
recon = data_preparer.get_recon(filepath)

In [6]:
#read timestamps file
filepath = "/content/drive/My Drive/Colab Notebooks/data/sub001/timestamps/sub001_2drt_01_vcv1_r1_recon.csv"
timestamps = data_preparer.read_timestamps(filepath)

In [7]:
#get vectors of the first row of timestamps
vectors = data_preparer.get_pixel_data(recon, abss, timestamps.iloc[0])

In [26]:
vector1 = data_preparer.get_pixel_data(recon, abss, timestamps.iloc[0])
vector2 = data_preparer.get_pixel_data(recon, abss, timestamps.iloc[1])


In [27]:
vector1.shape

(7, 20, 52)

In [8]:
timestamps['total frames'] = timestamps['last_frame'] - timestamps['first_frame'] #create new col to get the higehst number of frames

In [None]:
highest_framenr = round(timestamps['total frames'].max())
highest_framenr

80

In [None]:
#convert np array to tensors from pytroch
tensor1 = torch.from_numpy(vector1.copy())
tensor2 = torch.from_numpy(vector2.copy())

In [None]:
# expand tensor2 along the third dimension to match the size of tensor1
tensor2 = torch.cat((tensor2, torch.zeros(7, 20, 12)), dim=2)
# stack the tensors along a new first dimension
stacked_tensor = torch.stack([tensor1, tensor2], dim=0)

In [None]:
stacked_tensor.shape

torch.Size([2, 7, 20, 52])

In [None]:
highest_framenr - round(timestamps.iloc[0]['total frames'])

28

In [9]:
#TO DO: need to stack all arrays from timestamps together
#therfore torch.cat needs to be applied to all arrays that are not the largest size in dimension 2
#so we first need to find out how long the dimension 2 can be at max
#and then apply torch.cat((smallervector, torch.zeros(7,20,numbertomxlen)), dim=2)
#after that all tensors can be stacked
highest_framenr = round(timestamps['total frames'].max())
tensor_list = []
for i in range(len(timestamps)):
  vectors = data_preparer.get_pixel_data(recon, abss, timestamps.iloc[i])
  tensor = torch.from_numpy(vectors.copy())
  if round(timestamps.iloc[i]['total frames']) != highest_framenr:
    delta_frames = highest_framenr - round(timestamps.iloc[i]['total frames'])
    tensor = torch.cat((tensor, torch.zeros(7, 20, delta_frames)), dim=2)
  tensor_list.append(tensor)
final_tensor = torch.stack(tensor_list, dim=0)

In [10]:
final_tensor.shape

torch.Size([36, 7, 20, 80])

In [11]:
#convert it back to numpy and then to tensorflow to use it for training
np_tensor = final_tensor.numpy()
tf_tensor = tf.convert_to_tensor(np_tensor)

In [12]:
#and the label/letter of the vectors
timestamps['Buchstabe'] = timestamps['Buchstabe'].astype('category') #convert to catergory type
timestamps['letter_cat'] = timestamps["Buchstabe"].cat.codes # get new col with numeric category

In [18]:
X = np_tensor.reshape((36, 7, -1))
#X = np_tensor#vectors
y = timestamps['letter_cat']#letter

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)

In [20]:
X.shape

(36, 7, 1600)

Start the training...

Used: https://medium.com/@dclengacher/keras-lstm-recurrent-neural-networks-c1f5febde03d & https://shap.readthedocs.io/en/latest/example_notebooks/text_examples/sentiment_analysis/Keras%20LSTM%20for%20IMDB%20Sentiment%20Classification.html

In [14]:
from keras.models import Model
from keras.models import Sequential
from keras.layers import Input, Dense, TimeDistributed
from keras.layers import LSTM, Bidirectional, Conv1D, concatenate, Permute, Dropout

In [34]:
#set up the model
model = Sequential()
model.add(LSTM(10,return_sequences=False))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(X_train, y_train,
          batch_size=36,
          epochs=50,
          validation_data=(X_test, y_test))
score, acc = model.evaluate(X_test, y_test,
                            batch_size=36)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
