In [18]:
import pandas as pd
import numpy as np

Loading input into numpy ndarray

In [19]:
def clean_cell(cell: str):
    if cell == "R\n" or "M\n":
        return cell.strip("\n")
    return float(cell)

In [20]:
file = "data/sonar.all-data"
f = open(file)
M = []
for line in f:
    M.append([clean_cell(x) for x in line.split(',')])
M = np.array([np.array(i) for i in M])

Preparing train and val splits

In [21]:
np.random.shuffle(M)
x = M[:, :60].astype(float)
y = M[:, 60]

In [22]:
def normalise_2darray(d2array):
    output_array = []
    for array in d2array:
        x = (array - np.mean(array)) / np.std(array)
        # x[x < 0] *= -1
        # x = (x-np.min(x))/(np.max(x)-np.min(x))
        output_array.append(x)
    return np.asarray(output_array)

x_norm = normalise_2darray(x)
x_norm

array([[-0.90105381, -0.91308249, -0.85327323, ..., -0.93313028,
        -0.94081527, -0.95618525],
       [-1.02742384, -0.90864724, -0.89553311, ..., -1.12072155,
        -1.06414343, -1.111729  ],
       [-0.82561428, -0.7693425 , -0.83233858, ..., -0.85109583,
        -0.82349081, -0.83623159],
       ...,
       [-0.79760933, -0.80481718, -0.81399082, ..., -0.82545786,
        -0.82447497, -0.82414734],
       [-0.63204503, -0.66039757, -0.74612234, ..., -0.76013184,
        -0.78047896, -0.79582269],
       [-0.90872899, -0.8960953 , -0.90114878, ..., -0.93688406,
        -0.9249723 , -0.93544021]])

In [23]:
def encode_classes(target):
    encoded_target = []
    encoding_dict = {'R': 0, 'M': 1}
    
    for x in target:
        encoded_target.append(encoding_dict.get(x))
            
    return np.asarray(encoded_target), encoding_dict

y_enc, encoding = encode_classes(y)
print(y_enc, '\n', encoding)

[1 0 1 1 1 1 1 0 0 0 1 0 1 0 1 1 1 0 1 0 1 1 0 0 1 1 0 1 0 1 1 1 0 1 0 0 1
 0 1 0 0 1 0 1 1 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0
 0 1 1 0 0 1 1 1 1 1 0 0 0 0 1 1 0 1 1 0 1 1 1 1 0 0 0 1 1 0 1 1 0 0 1 1 0
 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 1 1 1 0 1 1
 0 1 0 1 0 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1 0 0 0 1 1 1
 0 1 1 1 1 0 0 1 0 0 1 0 0 1 1 0 1 1 1 1 0 1 0] 
 {'R': 0, 'M': 1}


Removing features with low impact on target class

In [24]:
corr_arr = []
for idx, row in enumerate(x_norm):
    arr = np.asarray(list(row) + [y_enc[idx]])
    corr_arr.append(arr)
corr_arr = np.asarray(corr_arr)
corr_map = np.corrcoef(corr_arr, rowvar=False).round(2)
corr_map = corr_map[:, 60]  # keep only final column of the heatmap | correlation to target class
corr_map = corr_map.reshape((61, 1))

to_drop = []
for idx, value in enumerate(corr_map):
    if value > -0.1 and value < 0.1:
        to_drop.append(idx)
to_drop

x_norm = np.delete(x_norm, to_drop, axis=1)
x_norm.shape

(208, 37)

Splitting into train, val and test sets

In [35]:
np.array(np.unique(y_enc, return_counts=True)).T

array([[  0,  97],
       [  1, 111]], dtype=int64)

In [25]:
from sklearn.model_selection import train_test_split
x_train, x_test = train_test_split(x_norm, test_size=0.1)
y_train, y_test = train_test_split(y_enc, test_size=0.1)
x_train, x_val =  train_test_split(x_train, test_size=0.2)
y_train, y_val = train_test_split(y_train, test_size=0.2)

In [26]:
print(x_train.shape, x_val.shape, x_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(149, 37) (38, 37) (21, 37)
(149,) (38,) (21,)


In [27]:
np.array(np.unique(y_train, return_counts=True)).T

array([[ 0, 70],
       [ 1, 79]], dtype=int64)

In [28]:
np.array(np.unique(y_val, return_counts=True)).T

array([[ 0, 17],
       [ 1, 21]], dtype=int64)

In [29]:
np.array(np.unique(y_test, return_counts=True)).T

array([[ 0, 10],
       [ 1, 11]], dtype=int64)

Creating model in Keras (tensorflow)

In [30]:
from keras.models import Model
from keras.layers import Input, Dense
from keras.activations import sigmoid, relu, hard_sigmoid, tanh, softmax

input = Input(shape=(x_norm.shape[1], ), name="input")
dense1 = Dense(128, activation=relu)(input)
dense2 = Dense(128, activation=relu)(dense1)
dense3 = Dense(128, activation=relu)(dense2)
dense4 = Dense(128, activation=relu)(dense3)
dense5 = Dense(64, activation=relu)(dense4)
dense6 = Dense(64, activation=relu)(dense5)
dense7 = Dense(64, activation=relu)(dense6)
dense8 = Dense(64, activation=relu)(dense7)
dense_out = Dense(1, activation=sigmoid, name="dense_out")(dense8)
model = Model(input, dense_out, name='test_model')

In [31]:
from keras.optimizers import Adam, SGD, RMSprop
from keras.losses import BinaryCrossentropy
from keras.metrics import BinaryAccuracy, Recall, AUC

# optimizer = Adam(learning_rate=0.001, beta_1=0.8, beta_2=0.9)
optimizer = RMSprop(learning_rate=0.001, rho=0.9)
model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer=optimizer, metrics=[BinaryAccuracy(), Recall(), AUC()])

In [32]:
val_data = (x_val, y_val)
model.fit(x=x_train, y=y_train, validation_data=val_data, batch_size=25, epochs=50, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2cd7e710610>

In [33]:
model.evaluate(x=x_test, y=y_test)



[1.312403917312622, 0.6666666865348816, 0.5454545617103577, 0.7954545617103577]

In [34]:
model.summary()

Model: "test_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 37)]              0         
                                                                 
 dense_8 (Dense)             (None, 128)               4864      
                                                                 
 dense_9 (Dense)             (None, 128)               16512     
                                                                 
 dense_10 (Dense)            (None, 128)               16512     
                                                                 
 dense_11 (Dense)            (None, 128)               16512     
                                                                 
 dense_12 (Dense)            (None, 64)                8256      
                                                                 
 dense_13 (Dense)            (None, 64)                4