In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
np.set_printoptions(threshold=np.inf, suppress=True)

Loading data

In [2]:
sonar_data = pd.read_csv('dataset/sonar.all-data', header=None)
print(sonar_data.head())
print(sonar_data.shape)
print(sonar_data[60].value_counts())

       0       1       2       3       4       5       6       7       8   \
0  0.0200  0.0371  0.0428  0.0207  0.0954  0.0986  0.1539  0.1601  0.3109   
1  0.0453  0.0523  0.0843  0.0689  0.1183  0.2583  0.2156  0.3481  0.3337   
2  0.0262  0.0582  0.1099  0.1083  0.0974  0.2280  0.2431  0.3771  0.5598   
3  0.0100  0.0171  0.0623  0.0205  0.0205  0.0368  0.1098  0.1276  0.0598   
4  0.0762  0.0666  0.0481  0.0394  0.0590  0.0649  0.1209  0.2467  0.3564   

       9   ...      51      52      53      54      55      56      57  \
0  0.2111  ...  0.0027  0.0065  0.0159  0.0072  0.0167  0.0180  0.0084   
1  0.2872  ...  0.0084  0.0089  0.0048  0.0094  0.0191  0.0140  0.0049   
2  0.6194  ...  0.0232  0.0166  0.0095  0.0180  0.0244  0.0316  0.0164   
3  0.1264  ...  0.0121  0.0036  0.0150  0.0085  0.0073  0.0050  0.0044   
4  0.4459  ...  0.0031  0.0054  0.0105  0.0110  0.0015  0.0072  0.0048   

       58      59  60  
0  0.0090  0.0032   R  
1  0.0052  0.0044   R  
2  0.0095  0.0078   

In [3]:
sonar_data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


In [4]:
# replacnutie R za 0 a M za 1
# R - Rock M - Mina
sonar_data[60] = sonar_data[60].replace(['R', 'M'], [0, 1])
sonar_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,0
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,0
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,0
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,0
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,0


Data normalization

In [5]:
normalized_df = sonar_data.copy()
for x in range(60):
    normalized_df[x] = MinMaxScaler().fit_transform(np.array(normalized_df[x]).reshape(-1,1))

normalized_df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.136431,0.156451,0.135677,0.035426,0.224956,0.237571,0.407468,0.340904,0.449282,0.285714,...,0.027104,0.155844,0.435673,0.149660,0.417949,0.502841,0.185355,0.245179,0.060046,0
1,0.323009,0.221603,0.272011,0.150024,0.283033,0.666756,0.574405,0.755458,0.483045,0.394537,...,0.108417,0.218182,0.111111,0.199546,0.479487,0.389205,0.105263,0.140496,0.087760,0
2,0.182153,0.246892,0.356110,0.243699,0.230028,0.585327,0.648810,0.819405,0.817859,0.869584,...,0.319544,0.418182,0.248538,0.394558,0.615385,0.889205,0.368421,0.258953,0.166282,0
3,0.062684,0.070724,0.199737,0.034950,0.034999,0.071486,0.288149,0.269239,0.077447,0.164593,...,0.161198,0.080519,0.409357,0.179138,0.176923,0.133523,0.093822,0.107438,0.256351,0
4,0.550885,0.282898,0.153088,0.079886,0.132640,0.147003,0.318182,0.531863,0.516659,0.621479,...,0.032810,0.127273,0.277778,0.235828,0.028205,0.196023,0.102975,0.292011,0.203233,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0.126844,0.145735,0.050263,0.028293,0.082678,0.410642,0.539773,0.361411,0.333629,0.367653,...,0.154066,0.241558,0.552632,0.061224,0.248718,0.176136,0.256293,0.528926,0.348730,1
204,0.227139,0.040720,0.092970,0.120304,0.175755,0.230046,0.258929,0.212348,0.141419,0.291863,...,0.075606,0.228571,0.365497,0.129252,0.151282,0.088068,0.066362,0.168044,0.140878,1
205,0.373894,0.184741,0.054205,0.055635,0.072026,0.287288,0.331169,0.247630,0.175181,0.345488,...,0.216833,0.062338,0.119883,0.126984,0.217949,0.389205,0.308924,0.209366,0.057737,1
206,0.212389,0.148736,0.156045,0.130766,0.025361,0.336469,0.387446,0.235502,0.276914,0.320463,...,0.111270,0.106494,0.339181,0.068027,0.079487,0.088068,0.173913,0.096419,0.096998,1


In [6]:
normalized_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.204011,0.16218,0.139068,0.114342,0.173732,0.253615,0.320472,0.285114,0.252485,0.281652,...,0.180031,0.265172,0.290669,0.197061,0.200555,0.213642,0.175035,0.216015,0.136425,0.533654
std,0.16955,0.141277,0.126242,0.110623,0.140888,0.158843,0.167175,0.187767,0.175311,0.192215,...,0.137432,0.183385,0.213474,0.160717,0.14708,0.164361,0.148051,0.170286,0.11619,0.50007
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.087389,0.067938,0.057326,0.044163,0.079508,0.152714,0.209957,0.165215,0.132571,0.142964,...,0.092368,0.118831,0.127924,0.080499,0.102564,0.096591,0.075515,0.098485,0.057737,0.0
50%,0.15708,0.129447,0.107753,0.090942,0.141517,0.220236,0.280438,0.235061,0.214349,0.244673,...,0.151213,0.235065,0.24269,0.156463,0.165385,0.160511,0.125858,0.173554,0.108545,1.0
75%,0.251106,0.202958,0.185447,0.139563,0.237319,0.333042,0.407738,0.361852,0.334555,0.368082,...,0.227175,0.374026,0.394737,0.260771,0.260897,0.287642,0.229977,0.28168,0.183025,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Train test val split
80/10/10

In [7]:
X = sonar_data.drop(columns=60, axis=1)
y = sonar_data[60]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=1)

In [9]:
print(X_train.shape)
print(y_train.value_counts())
print('******')
print(X_test.shape)
print(y_test.value_counts())
print('******')
print(X_val.shape)
print(y_val.value_counts())

(166, 60)
1    91
0    75
Name: 60, dtype: int64
******
(21, 60)
1    11
0    10
Name: 60, dtype: int64
******
(21, 60)
0    12
1     9
Name: 60, dtype: int64


Tensorflow

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from wandb.keras import WandbCallback

Vytvorenie modelu

In [11]:
tf_model = Sequential()
tf_model.add(Dense(16, activation=tf.keras.activations.relu, input_dim=len(X_train.columns)))
tf_model.add(Dense(16, activation=tf.keras.activations.relu))
tf_model.add(Dense(16, activation=tf.keras.activations.relu))
tf_model.add(Dense(1, activation=tf.keras.activations.sigmoid))

In [12]:
tf_model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(), metrics=['accuracy'])

In [13]:
tf_model.fit(
    X_train,
    y_train,
    batch_size=16,
    epochs=100,
    validation_data=(X_val,y_val)
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x196f37cab80>

In [14]:
tf_model.save('./model')



INFO:tensorflow:Assets written to: ./model\assets


INFO:tensorflow:Assets written to: ./model\assets


In [15]:
#pomocna funkcia
def predictEval(tf_model, XX, yy):
    # vykonanie predikcie
    y_pred = tf_model.predict(XX)
    # uprava outputu na boolean
    y_pred_bool = np.copy(y_pred)
    for x in y_pred_bool:
        x[0] = round(x[0])
    y_pred_bool

    #vratenie y a accuaracy
    return [y_pred, y_pred_bool, accuracy_score(y_pred_bool, yy)]

Eval

In [16]:
train = predictEval(tf_model, X_train, y_train)
val = predictEval(tf_model, X_val, y_val)
test = predictEval(tf_model, X_test, y_test)

print('Accuracy score')
print(f'Train: {train[2]*100:.2f}%')
print(f'Val: {val[2]*100:.2f}%')
print(f'Test: {test[2]*100:.2f}%')

Accuracy score
Train: 98.80%
Val: 76.19%
Test: 76.19%


In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

In [18]:
# Define the PyTorch model
class PyTorchModel(nn.Module):
    def __init__(self, input_dim):
        super(PyTorchModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 16)
        self.fc4 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

In [19]:
# Create a custom dataset
class SonarDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [20]:
train_dataset = SonarDataset(X_train, y_train)
val_dataset = SonarDataset(X_val, y_val)
test_dataset = SonarDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

In [21]:
# Instantiate the model
input_dim = len(X_train.columns)
model = PyTorchModel(input_dim)


In [22]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())


In [23]:
# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
    with torch.no_grad():
        model.eval()
        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model(X_val_batch)
            val_loss = criterion(y_val_pred, y_val_batch)
    print(f"Epoch: {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

Epoch: 1/100, Loss: 0.6938, Val Loss: 0.6985
Epoch: 2/100, Loss: 0.6939, Val Loss: 0.6993
Epoch: 3/100, Loss: 0.6931, Val Loss: 0.7001
Epoch: 4/100, Loss: 0.6759, Val Loss: 0.6992
Epoch: 5/100, Loss: 0.6705, Val Loss: 0.6991
Epoch: 6/100, Loss: 0.6622, Val Loss: 0.6976
Epoch: 7/100, Loss: 0.7124, Val Loss: 0.6966
Epoch: 8/100, Loss: 0.6405, Val Loss: 0.6928
Epoch: 9/100, Loss: 0.7340, Val Loss: 0.6968
Epoch: 10/100, Loss: 0.6303, Val Loss: 0.6795
Epoch: 11/100, Loss: 0.6216, Val Loss: 0.6760
Epoch: 12/100, Loss: 0.6344, Val Loss: 0.6567
Epoch: 13/100, Loss: 0.5790, Val Loss: 0.6380
Epoch: 14/100, Loss: 0.6828, Val Loss: 0.6345
Epoch: 15/100, Loss: 0.5233, Val Loss: 0.6132
Epoch: 16/100, Loss: 0.4052, Val Loss: 0.6398
Epoch: 17/100, Loss: 0.3434, Val Loss: 0.5785
Epoch: 18/100, Loss: 0.4802, Val Loss: 0.6134
Epoch: 19/100, Loss: 0.5932, Val Loss: 0.6040
Epoch: 20/100, Loss: 0.4175, Val Loss: 0.5921
Epoch: 21/100, Loss: 0.5626, Val Loss: 0.5845
Epoch: 22/100, Loss: 0.6027, Val Loss: 0.59

In [24]:
def predict_eval(model, loader):
    with torch.no_grad():
        model.eval()
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)
            y_pred_bool = torch.round(y_pred)
            accuracy = accuracy_score(y_pred_bool, y_batch)
    return accuracy

In [25]:
train_accuracy = predict_eval(model, train_loader)
val_accuracy = predict_eval(model, val_loader)
test_accuracy = predict_eval(model, test_loader)

In [26]:
print('Accuracy score')
print(f'Train: {train_accuracy * 100:.2f}%')
print(f'Val: {val_accuracy * 100:.2f}%')
print(f'Test: {test_accuracy * 100:.2f}%')

Accuracy score
Train: 83.33%
Val: 71.43%
Test: 76.19%
