In [1]:
# Import necessary libraries and classes


# Common Imports

import numpy as np
import sys
import os

from Code.utils.dataset import Dataset
import Code.utils.store_model as store_model

# DNN imports

from Code.DNN.layers.sigmoid import SigmoidActivation
from Code.DNN.functions.metrics import mse, accuracy
from Code.DNN.networks.neuralnet import NeuralNetwork
from Code.DNN.functions.mse import MeanSquaredError
from Code.DNN.layers.dense import DenseLayer
from Code.DNN.layers.dropout import DropOutLayer
from Code.DNN.optimizations.retained_gradient import RetGradient
from Code.DNN.optimizations.l1_reg import L1Reg
from Code.DNN.optimizations.l2_reg import L2Reg
from Code.DNN.functions.bce import BinaryCrossEntropy

In [2]:
# Set random seed to control randomness

np.random.seed(42)

In [3]:
# Read datasets
# Ignore if loading model from file

dataset = Dataset('../Dataset/DatasetsGerados/dataset_training_input.csv',
                      '../Dataset/DatasetsGerados/dataset_training_output.csv',
                      '../Dataset/DatasetsGerados/dataset_validation_input.csv',
                      '../Dataset/DatasetsGerados/dataset_validation_output.csv',
                      '../Dataset/dataset2_inputs.csv',
                      None)

X_train, y_train, X_validation, y_validation, X_test, y_test, ids = dataset.get_datasets('Text', 'Label', sep='\t', rem_punctuation=False)

In [4]:
# Build model

optimizer = RetGradient(learning_rate=0.0034, momentum=0.65)
loss = BinaryCrossEntropy()

regulator = L2Reg(l2_val=0.05)
model = NeuralNetwork(epochs=60, batch_size=30, optimizer=optimizer, regulator=regulator, verbose=True, loss=loss,
                    metric=accuracy, patience=50, min_delta=0.001)

n_features = X_train.shape[1]
model.add(DenseLayer(32, (n_features,)))
model.add(SigmoidActivation())
model.add(DenseLayer(16, (n_features)))
model.add(SigmoidActivation())
model.add(DropOutLayer(8, 0.8, (n_features,)))
model.add(DenseLayer(1))
model.add(SigmoidActivation())

# Train network

model.fit(X_train, y_train, X_val=X_validation, y_val=y_validation)

# Plot learning curves

model.plot_train_curves()

Epoch 1/60 - loss: 2162.4801 - accuracy: 0.5160
Epoch 2/60 - loss: 2070.4819 - accuracy: 0.5220
Epoch 3/60 - loss: 2076.9348 - accuracy: 0.5243
Epoch 4/60 - loss: 2073.4513 - accuracy: 0.5213
Epoch 5/60 - loss: 2062.4530 - accuracy: 0.5353
Epoch 6/60 - loss: 2075.2192 - accuracy: 0.5120
Epoch 7/60 - loss: 2089.4843 - accuracy: 0.4987
Epoch 8/60 - loss: 2069.3447 - accuracy: 0.5310
Epoch 9/60 - loss: 2073.5352 - accuracy: 0.5110
Epoch 10/60 - loss: 2066.6801 - accuracy: 0.5367
Epoch 11/60 - loss: 2062.6952 - accuracy: 0.5507
Epoch 12/60 - loss: 2074.7530 - accuracy: 0.5267
Epoch 13/60 - loss: 2070.8847 - accuracy: 0.5227
Epoch 14/60 - loss: 2073.1197 - accuracy: 0.5257
Epoch 15/60 - loss: 2063.8659 - accuracy: 0.5367
Epoch 16/60 - loss: 2057.2843 - accuracy: 0.5410
Epoch 17/60 - loss: 2017.9758 - accuracy: 0.5820
Epoch 18/60 - loss: 1896.6750 - accuracy: 0.6647
Epoch 19/60 - loss: 1642.4253 - accuracy: 0.7743
Epoch 20/60 - loss: 1302.7809 - accuracy: 0.8413
Epoch 21/60 - loss: 937.9390 

In [5]:
# Test model

# Predict test dataset

out = model.predict(X_test)

if y_test is not None:
    print(model.score(y_test, out))

# Store results

results_filepath = './submissao1-grupo007-s1.csv'

# Ensure the directory exists
os.makedirs(os.path.dirname(results_filepath), exist_ok=True)

results = dataset.merge_results(ids, out)
results.to_csv(results_filepath, sep='\t', index=False)

In [6]:
# Store model and Dataset class

dataset_filepath = './Model/dataset-s1'
dataset_key = 'dataset-s1'

store_model.store_model(dataset_filepath, dataset_key, dataset)

# Store network model

model_filepath = './Model/model-s1'
model_key = 'model-s1'

store_model.store_model(model_filepath, model_key, model)

In [7]:
# Retrieve model and Dataset class

# Retrieve Dataset class

dataset_filepath = './Model/dataset-s1'
dataset_key = 'dataset-s1'

dataset = store_model.retrieve_model(dataset_filepath, dataset_key)

# Retrieve model

model_filepath = './Model/model-s1'
model_key = 'model-s1'

model = store_model.retrieve_model(model_filepath, model_key)

# Load test dataset. If y_test doesn't exist, second parameter should be None

dataset.set_dataset_test('../Dataset/dataset2_inputs.csv',
                      None)

X_test, y_test, ids = dataset.get_test_dataset('Text', 'Label', sep='\t', rem_punctuation=False)