In [None]:
# Created by: Adam Fabo
# Date: 22.5.2022
# Created at HMU Crete
# Class: Neural Networks
# File contains script train neural network on differently split datasets (Chapter 7 in documentation) 
 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import neurolab as nl
import pandas as pd
import time
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split



In [None]:
# load data
data = pd.read_csv('data_banknote_auth_trimmed.txt', sep=",", header=None)

# data description
# 1. variance of Wavelet Transformed image (continuous)
# 2. skewness of Wavelet Transformed image (continuous)
# 3. curtosis of Wavelet Transformed image (continuous)
# 4. entropy of image (continuous)
# 5. class (integer)

data.columns = ["Variance", "Skewness", "Curtosis", "Entropy", "Class"]

data.head()

In [None]:
# get class as separate array
target = data.copy()["Class"]
target = target.to_numpy()

# two categories so 2 neurons, change 0 to [0,1] an 1 to [1,0]
banknotes = {0: [0,1], 1: [1,0]}
target = [ banknotes[number] for number in target]

target = np.array(target)


data = data.drop(columns = ["Class"])
data = data.to_numpy()


In [None]:
for training_size in range(1,99):
    for round_num in range(10):
        print( f"Dataset is split {(training_size/100)}%, Round: {round_num}" )
        
         # split the dataset
        X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=(1-(training_size/100)))

        # scale values to range (0,1)
        min_max_scaler = preprocessing.MinMaxScaler()
        X_train = min_max_scaler.fit_transform(X_train)
        X_test  = min_max_scaler.fit_transform(X_test)
        
        
        # create NN with 
        net = nl.net.newff(nl.tool.minmax(X_train),[6,2])
        net.layers[-1].transf = nl.trans.LogSig()
        net.layers[ 0].transf = nl.trans.LogSig()
        
        # set train function
        net.trainf = nl.train.train_rprop

        train_time = 0


        #train for 3000 epochs
        start_time = time.time()
        error = net.train(X_train,y_train,epochs = 3000, show = 100, lr = 0.05, goal=1e-5)
        train_time += time.time() - start_time
        
        # test training data
        out = net.sim(X_train)
        out = np.around(out)
        correct = (out == y_train).all(axis = 1)
        accuracy_training = (np.sum(correct)/len(out))*100


        # test test data
        out = net.sim(X_test)
        out = np.around(out)
        correct = (out == y_test).all(axis = 1)
        accuracy_test = (np.sum(correct)/len(out))*100
        
        


        df = pd.DataFrame([[round_num,training_size,accuracy_training,accuracy_test,train_time,len(error)]],
                          columns = ["Round","Training size","Accuracy on training","Accuracy on test","Training time", "Training epochs"])

        filename = f"data/training_size_{training_size}.csv"

        # if file exists, only append
        if os.path.isfile(filename):
            df.to_csv(filename,mode="a",header=False)
        else:
            df.to_csv(filename)

            
        