In [18]:
import pandas as pd
import numpy as np

Loading input into numpy ndarray

In [19]:
def clean_cell(cell: str):
    if cell == "R\n" or "M\n":
        return cell.strip("\n")
    return float(cell)

In [20]:
file = "data/sonar.all-data"
f = open(file)
M = []
for line in f:
    M.append([clean_cell(x) for x in line.split(',')])
M = np.array([np.array(i) for i in M])

Splitting matrix into features and classes

In [21]:
x = M[:, :60].astype(float)
y = M[:, 60]

Normalising features

In [22]:
def normalise_2darray(d2array):
    output_array = []
    for array in d2array:
        output_array.append((array - np.mean(array)) / np.std(array))
    return np.asarray(output_array)

x_norm = normalise_2darray(x)

Encoding classes and displaying counts

In [23]:
def encode_classes(target):
    encoded_target = []
    encoding_dict = {'R': 0, 'M': 1}
    
    for x in target:
        encoded_target.append(encoding_dict.get(x))
            
    return np.asarray(encoded_target), encoding_dict

y_enc, encoding = encode_classes(y)
np.array(np.unique(y_enc, return_counts=True)).T

array([[  0,  97],
       [  1, 111]], dtype=int64)

Removing features with low impact on target class

In [24]:
CORR_TRESHOLD = 0.1

corr_arr = []
for idx, row in enumerate(x_norm):
    arr = np.asarray(list(row) + [y_enc[idx]])
    corr_arr.append(arr)
corr_arr = np.asarray(corr_arr)
corr_map = np.corrcoef(corr_arr, rowvar=False).round(2)
corr_map = corr_map[:, 60]  # keep only final column of the heatmap | correlation to target class
corr_map = corr_map.reshape((61, 1))

to_drop = []
for idx, value in enumerate(corr_map):
    if value > -CORR_TRESHOLD and value < CORR_TRESHOLD:
        to_drop.append(idx)
to_drop

x_norm = np.delete(x_norm, to_drop, axis=1)
x_norm.shape

(208, 37)

Splitting into train, val and test sets

In [25]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test  = train_test_split(x_norm, y_enc, test_size=0.1)
x_train, x_val, y_train, y_val =  train_test_split(x_train, y_train, test_size=0.3)

In [26]:
print(x_train.shape, x_val.shape, x_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(130, 37) (57, 37) (21, 37)
(130,) (57,) (21,)


In [27]:
np.array(np.unique(y_train, return_counts=True)).T

array([[ 0, 65],
       [ 1, 65]], dtype=int64)

In [28]:
np.array(np.unique(y_val, return_counts=True)).T

array([[ 0, 23],
       [ 1, 34]], dtype=int64)

In [29]:
np.array(np.unique(y_test, return_counts=True)).T

array([[ 0,  9],
       [ 1, 12]], dtype=int64)

Creating model in Keras (tensorflow)

In [30]:
from keras.models import Model
from keras.layers import Input, Dense
from keras.activations import sigmoid, relu, hard_sigmoid, tanh, softmax

input = Input(shape=(x_norm.shape[1], ), name="input")
dense1 = Dense(128, activation=lambda x: relu(x, alpha=0.3))(input,)
dense2 = Dense(128, activation=lambda x: relu(x, alpha=0.3))(dense1)
dense3 = Dense(128, activation=lambda x: relu(x, alpha=0.3))(dense2)
dense4 = Dense(64, activation=lambda x: relu(x, alpha=0.3))(dense3)
dense5 = Dense(64, activation=lambda x: relu(x, alpha=0.3))(dense4)
dense6 = Dense(64, activation=lambda x: relu(x, alpha=0.3))(dense5)
dense_out = Dense(1, activation=sigmoid, name="dense_out")(dense6)
model = Model(input, dense_out, name='test_model')

The network consists of dense layers using a leaky relu activation function. The alpha is set to 0.3 because it is the default value for the LeakyReLu Keras layer and also more or less consistent with solution and literature I found online. I am using leaky relu instead of standard relu to avoid dead gradients.

We also tried using alternating tanh and sigmoid functions, as well as standard relu, but the results were significantly worse -> 0.4 - 0.6 accuracy on the test set

In [31]:
from keras.optimizers import Adam
from keras.losses import BinaryCrossentropy
from keras.metrics import BinaryAccuracy, AUC

optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.99)
model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer=optimizer, metrics=[BinaryAccuracy(), AUC()])

We are using the Adam optimizer with default Keras arguments. The model is evaluated using the BinaryAccuracy and AUC metrics

Other optimizers, such as RMSProp, didn't make much difference, just different behaviour with regards to required batch sizes and epochs

With regards to Adam parameters, they caused some difference in the results with the batch size and epoch settings we used the most frequently, and also occasional wild jumps in val_loss

In [32]:
import wandb
from wandb.keras import WandbMetricsLogger

wandb.init(project="nn-assignemt-1-tf-keras", group="batch_25_epoch_50")

val_data = (x_val, y_val)
model.fit(x=x_train, y=y_train, validation_data=val_data, batch_size=25, epochs=50, verbose=1, callbacks=[WandbMetricsLogger()])

0,1
epoch/auc,▁▅▅▆▇▇▇▇▇▇████▇███████████████▇█████████
epoch/binary_accuracy,▁▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████▇▇▇▇████████
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▇▆▆▅▄▅▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▃▃▅▃▂▂▂▁▁▁▁▁
epoch/val_auc,▁▂▂▃▆▇▅▆▆▆▆▇▇▇█▇▇▇▇▇▇▇▇▆▆▇▇▇▇▅█▇████████
epoch/val_binary_accuracy,▁▃▃▄▅▆▅▅▆▅▆▆▇▆▇▅▇█▆▇▆▇▇▆▇██▇▆▆▆▇▆▇▇▇▇▇▇▇
epoch/val_loss,▂▂▂▂▁▁▂▁▁▂▂▂▂▂▁▂▂▂▂▂▄▃▄▃▅▄▃▃▄█▂▂▂▁▂▃▃▃▃▃

0,1
epoch/auc,1.0
epoch/binary_accuracy,0.99231
epoch/epoch,49.0
epoch/learning_rate,0.001
epoch/loss,0.02392
epoch/val_auc,0.85546
epoch/val_binary_accuracy,0.78947
epoch/val_loss,0.92729


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x14ca72d85e0>

In [33]:
model.evaluate(x=x_test, y=y_test)



[2.6008682250976562, 0.6666666865348816, 0.75]

In [34]:
model.summary()

Model: "test_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 37)]              0         
                                                                 
 dense_6 (Dense)             (None, 128)               4864      
                                                                 
 dense_7 (Dense)             (None, 128)               16512     
                                                                 
 dense_8 (Dense)             (None, 128)               16512     
                                                                 
 dense_9 (Dense)             (None, 64)                8256      
                                                                 
 dense_10 (Dense)            (None, 64)                4160      
                                                                 
 dense_11 (Dense)            (None, 64)                4