In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
np.set_printoptions(threshold=np.inf, suppress=True)
import matplotlib.pyplot as plt

Loading data

Dataset obsahuje 60 continues features a jeden predicate (R/M), ktorý vraví o tom, či sa jedná o kameň alebo mínu. 

In [2]:
sonar_data = pd.read_csv('dataset/sonar.all-data', header=None)
print(sonar_data.head())
print(sonar_data.shape)
print(sonar_data[60].value_counts())

       0       1       2       3       4       5       6       7       8   \
0  0.0200  0.0371  0.0428  0.0207  0.0954  0.0986  0.1539  0.1601  0.3109   
1  0.0453  0.0523  0.0843  0.0689  0.1183  0.2583  0.2156  0.3481  0.3337   
2  0.0262  0.0582  0.1099  0.1083  0.0974  0.2280  0.2431  0.3771  0.5598   
3  0.0100  0.0171  0.0623  0.0205  0.0205  0.0368  0.1098  0.1276  0.0598   
4  0.0762  0.0666  0.0481  0.0394  0.0590  0.0649  0.1209  0.2467  0.3564   

       9   ...      51      52      53      54      55      56      57  \
0  0.2111  ...  0.0027  0.0065  0.0159  0.0072  0.0167  0.0180  0.0084   
1  0.2872  ...  0.0084  0.0089  0.0048  0.0094  0.0191  0.0140  0.0049   
2  0.6194  ...  0.0232  0.0166  0.0095  0.0180  0.0244  0.0316  0.0164   
3  0.1264  ...  0.0121  0.0036  0.0150  0.0085  0.0073  0.0050  0.0044   
4  0.4459  ...  0.0031  0.0054  0.0105  0.0110  0.0015  0.0072  0.0048   

       58      59  60  
0  0.0090  0.0032   R  
1  0.0052  0.0044   R  
2  0.0095  0.0078   

In [3]:
sonar_data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,0.029164,0.038437,0.043832,0.053892,0.075202,0.10457,0.121747,0.134799,0.178003,0.208259,...,0.016069,0.01342,0.010709,0.010941,0.00929,0.008222,0.00782,0.007949,0.007941,0.006507
std,0.022991,0.03296,0.038428,0.046528,0.055552,0.059105,0.061788,0.085152,0.118387,0.134416,...,0.012008,0.009634,0.00706,0.007301,0.007088,0.005736,0.005785,0.00647,0.006181,0.005031
min,0.0015,0.0006,0.0015,0.0058,0.0067,0.0102,0.0033,0.0055,0.0075,0.0113,...,0.0,0.0008,0.0005,0.001,0.0006,0.0004,0.0003,0.0003,0.0001,0.0006
25%,0.01335,0.01645,0.01895,0.024375,0.03805,0.067025,0.0809,0.080425,0.097025,0.111275,...,0.008425,0.007275,0.005075,0.005375,0.00415,0.0044,0.0037,0.0036,0.003675,0.0031
50%,0.0228,0.0308,0.0343,0.04405,0.0625,0.09215,0.10695,0.1121,0.15225,0.1824,...,0.0139,0.0114,0.00955,0.0093,0.0075,0.00685,0.00595,0.0058,0.0064,0.0053
75%,0.03555,0.04795,0.05795,0.0645,0.100275,0.134125,0.154,0.1696,0.233425,0.2687,...,0.020825,0.016725,0.0149,0.0145,0.0121,0.010575,0.010425,0.01035,0.010325,0.008525
max,0.1371,0.2339,0.3059,0.4264,0.401,0.3823,0.3729,0.459,0.6828,0.7106,...,0.1004,0.0709,0.039,0.0352,0.0447,0.0394,0.0355,0.044,0.0364,0.0439


Vykonanie encodingu, kde mínu zakódujeme ako 1 a kameň ako 0

In [4]:
# replacnutie R za 0 a M za 1
# R - Rock M - Mina
sonar_data[60] = sonar_data[60].replace(['R', 'M'], [0, 1])
sonar_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,0
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,0
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,0
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,0
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,0


Data normalization

Data, ktoré používame normalizujeme pomocou standard scaleru

In [5]:
normalized_dfMinMax = sonar_data.copy()
for x in range(60):
    normalized_dfMinMax[x] = MinMaxScaler().fit_transform(np.array(normalized_dfMinMax[x]).reshape(-1,1))

normalized_dfMinMax

normalized_df = sonar_data.copy()
for x in range(60):
    normalized_df[x] = StandardScaler().fit_transform(np.array(normalized_df[x]).reshape(-1,1))

normalized_df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,-0.399551,-0.040648,-0.026926,-0.715105,0.364456,-0.101253,0.521638,0.297843,1.125272,0.021186,...,-1.115432,-0.597604,0.680897,-0.295646,1.481635,1.763784,0.069870,0.171678,-0.658947,0
1,0.703538,0.421630,1.055618,0.323330,0.777676,2.607217,1.522625,2.510982,1.318325,0.588706,...,-0.522349,-0.256857,-0.843151,0.015503,1.901046,1.070732,-0.472406,-0.444554,-0.419852,0
2,-0.129229,0.601067,1.723404,1.172176,0.400545,2.093337,1.968770,2.852370,3.232767,3.066105,...,1.017585,0.836373,-0.197833,1.231812,2.827246,4.120162,1.309360,0.252761,0.257582,0
3,-0.835555,-0.648910,0.481740,-0.719414,-0.987079,-1.149364,-0.193816,-0.084747,-1.000852,-0.610469,...,-0.137365,-1.009341,0.557326,-0.111785,-0.161060,-0.488635,-0.549875,-0.639154,1.034640,0
4,2.050790,0.856537,0.111327,-0.312227,-0.292365,-0.672796,-0.013735,1.317299,1.510531,1.772220,...,-1.073812,-0.753780,-0.060532,0.241793,-1.174638,-0.107456,-0.487900,0.447361,0.576375,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,-0.456232,-0.116681,-0.705146,-0.779738,-0.647842,0.990954,1.314965,0.407323,0.463980,0.448504,...,-0.189390,-0.129077,1.230104,-0.847228,0.328253,-0.228741,0.550172,1.841992,1.831621,1
204,0.136733,-0.861801,-0.366036,0.054026,0.014392,-0.148740,-0.369029,-0.388465,-0.635067,0.053253,...,-0.761663,-0.200066,0.351373,-0.422934,-0.335815,-0.765856,-0.735798,-0.282388,0.038412,1
205,1.004381,0.160078,-0.673843,-0.531979,-0.723629,0.212502,0.064137,-0.200113,-0.442014,0.332912,...,0.268428,-1.108725,-0.801960,-0.437077,0.118548,1.070732,0.906526,-0.039138,-0.678871,1
206,0.049533,-0.095392,0.134804,0.148821,-1.055648,0.522865,0.401585,-0.264859,0.139685,0.202404,...,-0.501539,-0.867363,0.227802,-0.804798,-0.825128,-0.765856,-0.007598,-0.704020,-0.340154,1


In [6]:
normalized_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,...,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,1.708035e-17,6.832142000000001e-17,-1.195625e-16,1.622634e-16,-1.793437e-16,2.049643e-16,1.024821e-16,3.4160710000000005e-17,-3.757678e-16,3.4160710000000005e-17,...,1.024821e-16,3.4160710000000005e-17,-1.45183e-16,2.775558e-17,-2.39125e-16,3.4160710000000005e-17,-1.110223e-16,1.345078e-16,7.686159e-17,0.533654
std,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,...,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,1.002413,0.50007
min,-1.206158,-1.150725,-1.104253,-1.036115,-1.236093,-1.600493,-1.921613,-1.52211,-1.443689,-1.468833,...,-1.313126,-1.449472,-1.364897,-1.229092,-1.366868,-1.302971,-1.185113,-1.271603,-1.176985,0.0
25%,-0.6894939,-0.6686781,-0.6490624,-0.6359298,-0.6703975,-0.6367565,-0.6626732,-0.6400918,-0.685659,-0.7232644,...,-0.6394049,-0.7999231,-0.7642025,-0.7270112,-0.6678488,-0.7138771,-0.6738235,-0.691858,-0.6788714,0.0
50%,-0.2774703,-0.2322506,-0.2486515,-0.2120457,-0.2292089,-0.2106432,-0.2400524,-0.2672134,-0.2180558,-0.1928459,...,-0.2102002,-0.1645716,-0.2252935,-0.2532164,-0.2396997,-0.3240352,-0.3329639,-0.2499546,-0.2405314,1.0
75%,0.2784345,0.2893335,0.3682681,0.2285353,0.4524231,0.5012417,0.5232608,0.4096773,0.4692723,0.450741,...,0.343864,0.5950106,0.4886751,0.3973675,0.4112618,0.4513169,0.3719959,0.3865486,0.4020352,1.0
max,4.706053,5.944643,6.836142,8.025419,5.878863,4.710224,4.074573,3.816498,4.274237,3.746234,...,5.980752,4.01668,3.330819,5.008027,5.448568,4.795888,5.585599,4.615037,7.450343,1.0


Train test val split 80/10/10

Dáta delíme takto kvôli menšiemu počtu dostupných vzoriek

In [7]:
X = normalized_df.drop(columns=60, axis=1)
y = normalized_df[60]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=1)

In [9]:
print(X_train.shape)
print(y_train.value_counts())
print('******')
print(X_test.shape)
print(y_test.value_counts())
print('******')
print(X_val.shape)
print(y_val.value_counts())

(166, 60)
1    91
0    75
Name: 60, dtype: int64
******
(21, 60)
1    11
0    10
Name: 60, dtype: int64
******
(21, 60)
0    12
1     9
Name: 60, dtype: int64


Tensorflow

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout
import wandb
from wandb.keras import WandbCallback

2023-03-29 15:34:25.490127: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Vytvorenie modelu

V rámci tensorflow časti vytvárame 5 rôznych modelov

Všetky majú rovnakú architektúru, len sa líšia v metódach použitých pri riešení problému overfittingu. Máme vstupnú vrstvu so 16 neuronmi, dve skryté vrstvy s 16 neurónmi a nakoniec výstupnú vrstvu s jedným neurónom.

Pri všetkých vrstvách používame aktivačnú funkciu ReLu až na poslednú, kde používame sigmoid.

Model 1 nepoužíva žiadnu metódu prevencie overfittingu

Model 2 používa metódu dropout, kde sme ako šancu vypadnuta neurónu dali 50%, dropout sa vykonáva pri prvej a druhej skrytej vrstve.

Model 3 používa early stopping, riadi sa podľa val_loss a parameter patience sme nastavili na 10 epoch.

Model 4 používa regularizáciu, ako hodnotu parametrov l1 a l2 sme zvolili hodnotu 0,005. Skúsili sme použiť aj vyššie hodnoty ale to viedlo k oveľa horším výsledkom, kde napríklad acc padlo približne na 55%.

Model 5 sme skúsili otestovať ako sa bude NN správať ak použijeme naraz dropout a early stopping, parametre sme použili rovnaké ako pri predchádzajúcich modeloch.





In [11]:
#nothing
tf_model1 = Sequential()
tf_model1.add(Dense(16, activation=tf.keras.activations.relu, input_dim=len(X_train.columns)))
tf_model1.add(Dense(16, activation=tf.keras.activations.relu))
tf_model1.add(Dense(16, activation=tf.keras.activations.relu))
tf_model1.add(Dense(1, activation=tf.keras.activations.sigmoid))
#dropout
tf_model2 = Sequential()
tf_model2.add(Dense(16, activation=tf.keras.activations.relu, input_dim=len(X_train.columns)))
tf_model2.add(Dropout(0.5))
tf_model2.add(Dense(16, activation=tf.keras.activations.relu))
tf_model2.add(Dropout(0.5))
tf_model2.add(Dense(16, activation=tf.keras.activations.relu))
tf_model2.add(Dense(1, activation=tf.keras.activations.sigmoid))
#early stopping
tf_model3 = Sequential()
tf_model3.add(Dense(16, activation=tf.keras.activations.relu, input_dim=len(X_train.columns)))
tf_model3.add(Dense(16, activation=tf.keras.activations.relu))
tf_model3.add(Dense(16, activation=tf.keras.activations.relu))
tf_model3.add(Dense(1, activation=tf.keras.activations.sigmoid))
earlyStop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
#regularization
tf_model4 = Sequential()
tf_model4.add(Dense(16, activation=tf.keras.activations.relu, input_dim=len(X_train.columns), kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.005, l2=0.005)))
tf_model4.add(Dense(16, activation=tf.keras.activations.relu, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.005, l2=0.005)))
tf_model4.add(Dense(16, activation=tf.keras.activations.relu, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.005, l2=0.005)))
tf_model4.add(Dense(1, activation=tf.keras.activations.sigmoid))
#dropout + earlystop
tf_model5 = Sequential()
tf_model5.add(Dense(16, activation=tf.keras.activations.relu, input_dim=len(X_train.columns)))
tf_model5.add(Dropout(0.5))
tf_model5.add(Dense(16, activation=tf.keras.activations.relu))
tf_model5.add(Dropout(0.5))
tf_model5.add(Dense(16, activation=tf.keras.activations.relu))
tf_model5.add(Dense(1, activation=tf.keras.activations.sigmoid))



2023-03-29 15:35:06.495299: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Ako loss funkciu sme zvolili binary crossentropy a ako optimizer Adam.

Na sledovanie NN sme vybrali 4 metriky Acc, MSE, precision a recall

In [12]:
precision = tf.keras.metrics.Precision()
recall = tf.keras.metrics.Recall()
tf_model1.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(), metrics=['accuracy', 'mse', precision, recall])
tf_model2.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(), metrics=['accuracy', 'mse', precision, recall])
tf_model3.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(), metrics=['accuracy', 'mse', precision, recall])
tf_model4.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(), metrics=['accuracy', 'mse', precision, recall])
tf_model5.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=Adam(), metrics=['accuracy', 'mse', precision, recall])

Batch size sme zvolili na 16.

Počet epoch trénovania sme zvolili 100, pri tomto zadaní by mal postačovať aj menší počet epoch ale kvôli krátkemu času behu sme nechali 100 epoch.

In [13]:
def fitModel(model, name, callbackArg):
    
    model.fit(
    X_train,
    y_train,
    batch_size=16,
    epochs=100,
    validation_data=(X_val,y_val),
    callbacks=callbackArg
    )

    model.save('./'+ name)
    

In [None]:

w = wandb.init(project='zadanie1',reinit=True)
w.config.epochs = 100
w.config.batch_size = 16



fitModel(tf_model1, 'model1', [WandbCallback()])

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

In [None]:
fitModel(tf_model2, 'model2', [WandbCallback()])

In [None]:
w.finish()
w = wandb.init(project='zadanie1',reinit=True)

In [None]:
fitModel(tf_model3, 'model3', [earlyStop,WandbCallback()])

In [None]:
w.finish()
w = wandb.init(project='zadanie1',reinit=True)

In [None]:
fitModel(tf_model4, 'model4', [WandbCallback()])

In [None]:
w.finish()
w = wandb.init(project='zadanie1',reinit=True)

In [None]:
fitModel(tf_model5, 'model5', [earlyStop,WandbCallback()])

In [None]:
w.finish()
w = wandb.init(project='zadanie1',reinit=True)

In [None]:
#pomocna funkcia
def predictEval(tf_model, XX, yy):
    # vykonanie predikcie
    y_pred = tf_model.predict(XX)
    # uprava outputu na boolean
    y_pred_bool = np.copy(y_pred)
    for x in y_pred_bool:
        x[0] = round(x[0])
    y_pred_bool

    #vratenie y a accuaracy
    return [y_pred, y_pred_bool, accuracy_score(y_pred_bool, yy), mean_squared_error(y_pred_bool, yy), precision_score(y_pred_bool, yy), recall_score(y_pred_bool, yy)]

Eval

In [None]:
def predictEvalWrap(model, name):
    train = predictEval(model, X_train, y_train)
    val = predictEval(model, X_val, y_val)
    test = predictEval(model, X_test, y_test)

    print(name)
    print('Accuracy score')
    print(f'Train: {train[2]*100:.2f}%')
    print(f'Val: {val[2]*100:.2f}%')
    print(f'Test: {test[2]*100:.2f}%')
    print('Mean squared error')
    print(f'Train: {train[3]*100:.2f}%')
    print(f'Val: {val[3]*100:.2f}%')
    print(f'Test: {test[3]*100:.2f}%')
    print('Precision')
    print(f'Train: {train[4]*100:.2f}%')
    print(f'Val: {val[4]*100:.2f}%')
    print(f'Test: {test[4]*100:.2f}%')
    print('Recall')
    print(f'Train: {train[5]*100:.2f}%')
    print(f'Val: {val[5]*100:.2f}%')
    print(f'Test: {test[5]*100:.2f}%')
    print('------------------------------------------')

In [None]:
predictEvalWrap(tf_model1, 'No overfit prevention')
predictEvalWrap(tf_model2, 'Dropout')
predictEvalWrap(tf_model3, 'Early stopping')
predictEvalWrap(tf_model4, 'Regularization')
predictEvalWrap(tf_model5, 'Dropout + Early stopping')

In [None]:
V rámci pytorch časti vytvárame 3 rôzne modely

Všetky modely majú rovnakú architektúru, ktorá sa líši iba v metódach použitých pri riešení problému overfittingu. Vstupná vrstva má 16 neurónov, nasledovaná dvoma skrytými vrstvami s 16 neurónmi a nakoniec výstupná vrstva s jedným neurónom.

Všetky vrstvy používajú aktivačnú funkciu ReLu okrem poslednej, kde sa používa sigmoid.

Model 1 nepoužíva žiadnu metódu prevencie overfittingu.

Model 2 používa metódu dropout, kde sme ako pravdepodobnosť vypadnutia neurónov zvolili 50%. Dropout sa aplikuje na prvú a druhú skrytú vrstvu.

Model 3 používa metódu early stopping, ktorá sa riadi podľa val_loss a parameter patience je nastavený na 10 epoch.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score

In [None]:
PyTorchModel nepoužíva žiadnu metódu prevencie overfittingu.

In [None]:
# Define the PyTorch model
class PyTorchModel(nn.Module):
    def __init__(self, input_dim):
        super(PyTorchModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 16)
        self.fc4 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

PyTorchModelDropout používa metódu dropout, kde sme ako pravdepodobnosť vypadnutia neurónov zvolili 50%. Dropout sa aplikuje na prvú a druhú skrytú vrstvu.

In [None]:
class PyTorchModelDropout(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.5):
        super(PyTorchModelDropout, self).__init__()
        self.fc1 = nn.Linear(input_dim, 16)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(16, 16)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(16, 16)
        self.dropout3 = nn.Dropout(dropout_rate)
        self.fc4 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.relu(self.fc3(x))
        x = self.dropout3(x)
        x = torch.sigmoid(self.fc4(x))
        return x

Táto trieda datasetu prijíma dva argumenty, X a y, ktoré sú príznaky a lable datasetu.
Metóda init() inicializuje dataset tým, že konvertuje príznaky a značky na PyTorch tenzory pomocou funkcie torch.tensor().

In [None]:
class SonarDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

Táto časť vytvára PyTorch datasety a dataloadery pre trénovacie, validačné a testovacie dáta pomocou vlastnej triedy datasetu "SonarDataset".

In [None]:
train_dataset = SonarDataset(X_train, y_train)
val_dataset = SonarDataset(X_val, y_val)
test_dataset = SonarDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=len(val_dataset), shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

V tejto časti inicializujeme vyššie spomínané modely.

In [None]:
input_dim = len(X_train.columns)
model = PyTorchModel(input_dim)
model_dropout = PyTorchModelDropout(input_dim, dropout_rate=0.5)
model_early_stopping = PyTorchModel(input_dim)

Ako Loss funkcia sa používa trieda nn.BCELoss() a bude použitá ako stratová funkcia pre všetky tri modely. 
Ako optimizer sa používa Adam ktorý budú použitý na aktualizáciu váh a sklonov v rámci trénovania modelov.

In [None]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())
optimizer_dropout = optim.Adam(model_dropout.parameters())
optimizer_early_stopping = optim.Adam(model_early_stopping.parameters())

In [None]:
num_epochs = 100

Táto časť trénuje model neurónovej siete pomocou trénovacích dát a vyhodnocuje výkon modelu na validačných dátach v každej epoch.

In [None]:
Trenóvanie modelu bez overfittingu.

In [None]:

for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        wandb.log({"loss": loss})
    with torch.no_grad():
        model.eval()
        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model(X_val_batch)
            val_loss = criterion(y_val_pred, y_val_batch)

    print(f"Epoch: {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")

Trénovanie modelu s pridaným droupoutom.

In [None]:
for epoch in range(num_epochs):
    model_dropout.train()
    for X_batch, y_batch in train_loader:
        optimizer_dropout.zero_grad()
        y_pred = model_dropout(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
    
    with torch.no_grad():
        model_dropout.eval()
        val_loss_sum = 0
        val_batch_count = 0
        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model_dropout(X_val_batch)
            val_loss = criterion(y_val_pred, y_val_batch)
            val_loss_sum += val_loss.item()
            val_batch_count += 1
        val_loss_avg = val_loss_sum / val_batch_count
    print(f"Epoch: {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss_avg:.4f}")

Trénovanie modelu s early stoppage.

In [None]:
patience = 10

best_val_loss = float("inf")
counter = 0

for epoch in range(num_epochs):
    model_early_stopping.train()
    for X_batch, y_batch in train_loader:
        optimizer_early_stopping.zero_grad()
        y_pred = model_early_stopping(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        model_early_stopping.eval()
        val_loss_sum = 0
        val_batch_count = 0
        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model_early_stopping(X_val_batch)
            val_loss = criterion(y_val_pred, y_val_batch)
            val_loss_sum += val_loss.item()
            val_batch_count += 1
        val_loss_avg = val_loss_sum / val_batch_count

    print(f"Epoch: {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss_avg:.4f}")

    # Early stopping logic
    if val_loss_avg < best_val_loss:
        best_val_loss = val_loss_avg
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping!")
            break

In [None]:
Pomocná funkcia na vyhodnotenie úspešnosti.

In [None]:
def predict_eval(model, loader):
    with torch.no_grad():
        model.eval()
        for X_batch, y_batch in loader:
            y_pred = model(X_batch)
            y_pred_bool = torch.round(y_pred)
            accuracy = accuracy_score(y_pred_bool, y_batch)
    return accuracy

In [None]:
train_accuracy = predict_eval(model, train_loader)
val_accuracy = predict_eval(model, val_loader)
test_accuracy = predict_eval(model, test_loader)

In [None]:
train_accuracy_dropout = predict_eval(model_dropout, train_loader)
val_accuracy_dropout = predict_eval(model_dropout, val_loader)
test_accuracy_dropout = predict_eval(model_dropout, test_loader)

In [None]:
train_accuracy_early_stopping = predict_eval(model_early_stopping, train_loader)
val_accuracy_early_stopping = predict_eval(model_early_stopping, val_loader)
test_accuracy_early_stopping = predict_eval(model_early_stopping, test_loader)

In [None]:
print('No overfit prevention')
print(f'Train: {train_accuracy * 100:.2f}%')
print(f'Val: {val_accuracy * 100:.2f}%')
print(f'Test: {test_accuracy * 100:.2f}%')

In [None]:
print('Droupout')
print(f'Train: {train_accuracy_dropout * 100:.2f}%')
print(f'Val: {val_accuracy_dropout * 100:.2f}%')
print(f'Test: {test_accuracy_dropout * 100:.2f}%')

In [None]:
print('Early stopping')
print(f'Train: {train_accuracy_early_stopping * 100:.2f}%')
print(f'Val: {val_accuracy_early_stopping * 100:.2f}%')
print(f'Test: {test_accuracy_early_stopping * 100:.2f}%')