In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler

2022-01-18 17:34:33.574866: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-18 17:34:33.574887: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
data = pd.read_pickle("../data/train/train_set_128px.pkl")
# use k fold cross validation later :)
train_data, val_data = train_test_split(data, stratify=data["label_proc_frozen"], test_size=0.25, random_state=7655)
del data

In [3]:
def normalize(images):
    minimum = np.min(images, axis=(1,2), keepdims=True)
    maximum = np.max(images, axis=(1,2), keepdims=True)
    return (images - minimum) / (maximum-minimum)

In [4]:
label = "label_proc_aggregate"

X_abs_train = normalize(np.stack(train_data["img_abs"]))
X_ang_train = normalize(np.stack(train_data["img_ang"]))
Y_train = train_data[label].to_numpy()

X_train = np.stack((X_abs_train, X_ang_train), axis=-1)
del X_abs_train, X_ang_train, train_data

X_abs_val = normalize(np.stack(val_data["img_abs"]))
X_ang_val = normalize(np.stack(val_data["img_ang"]))
Y_val = val_data[label].to_numpy()
X_val = np.stack((X_abs_val, X_ang_val), axis=-1)
del X_abs_val, X_ang_val, val_data

In [5]:
n_tot = len(Y_train)
n_pos = np.sum(Y_train)
n_neg = n_tot - n_pos
print(n_tot, n_neg, n_pos)

# Undersample Data -> Balance it
neg_idx = np.where(Y_train==0)[0]
idx_del = np.random.choice(n_neg, size=n_neg-n_pos, replace=False)
Y_train = np.delete(Y_train, neg_idx[idx_del])
X_train = np.delete(X_train, neg_idx[idx_del], axis=0)

n_tot = len(Y_train)
n_pos = np.sum(Y_train)
n_neg = n_tot - n_pos
print(n_tot, n_neg, n_pos)

12577 10695 1882
3764 1882 1882


In [6]:
train_ds = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
val_ds = tf.data.Dataset.from_tensor_slices((X_val, Y_val))


BATCH_SIZE = 64

train_batches = (
    train_ds
    .cache()
    .shuffle(buffer_size=1024)
    .batch(BATCH_SIZE)
    #.repeat()
    #.map(Augment())
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

val_batches = (
    val_ds
    .cache()
    .batch(BATCH_SIZE)
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

2022-01-18 17:34:39.695010: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-01-18 17:34:39.695042: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-01-18 17:34:39.695068: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (t480s): /proc/driver/nvidia/version does not exist
2022-01-18 17:34:39.695381: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-18 17:34:39.910486: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 549584896 exceeds 10%

In [7]:
def getModel1():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 2)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.25))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

In [8]:
model = getModel1()
model.summary()
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy',tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.Precision(name='precision')])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 32)      608       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 30, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 64)        3

In [9]:
history = model.fit(train_batches, epochs=15, 
                    validation_data=val_batches)

Epoch 1/15


2022-01-18 17:36:20.238549: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)




2022-01-18 17:36:56.639025: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 549584896 exceeds 10% of free system memory.
2022-01-18 17:36:57.116770: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 549584896 exceeds 10% of free system memory.


Epoch 2/15

2022-01-18 17:37:47.826901: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 549584896 exceeds 10% of free system memory.
2022-01-18 17:37:48.279183: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 549584896 exceeds 10% of free system memory.


Epoch 3/15
Epoch 4/15

KeyboardInterrupt: 

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss & accuracy')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['accuracy', 'val_accuracy', 'loss', 'val_loss'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['recall'])
plt.plot(history.history['val_recall'])
plt.plot(history.history['precision'])
plt.plot(history.history['val_precision'])
plt.title('recall & precison')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['recall', 'val_recall', 'precision', 'val_precision'], loc='upper left')
plt.show()