In [1]:
# Import necessary libraries and classes


# Common Imports

import numpy as np
import sys
import os

from Code.utils.dataset import Dataset
import Code.utils.store_model as store_model

# DNN imports

from Code.DNN.layers.sigmoid import SigmoidActivation
from Code.DNN.functions.metrics import mse, accuracy
from Code.DNN.networks.neuralnet import NeuralNetwork
from Code.DNN.functions.mse import MeanSquaredError
from Code.DNN.layers.dense import DenseLayer as DNN_DenseLayer
from Code.DNN.layers.dropout import DropOutLayer
from Code.DNN.optimizations.retained_gradient import RetGradient
from Code.DNN.optimizations.l1_reg import L1Reg
from Code.DNN.optimizations.l2_reg import L2Reg
from Code.DNN.functions.bce import BinaryCrossEntropy


# Logistic Regression imports

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

from Code.LogisticRegression.logisticReg.logisticReg import LogisticRegression


# RNN imports

from Code.RNN.layers.sigmoid import SigmoidActivation
from Code.RNN.functions.metrics import mse, accuracy
from Code.RNN.networks.recorrent_neural_network import RecorrentNeuralNetwork
from Code.RNN.functions.mse import MeanSquaredError
from Code.RNN.layers.rnn import RNN
from Code.RNN.layers.dense import DenseLayer as RNN_DenseLayer
from Code.RNN.optimizations.retained_gradient import RetGradient
from Code.RNN.functions.bce import BinaryCrossEntropy
from Code.RNN.layers.relu import ReLUActivation

In [2]:
# Set random seed to control randomness

np.random.seed(42)

In [3]:
# Read datasets
# Ignore if loading model from file

dataset = Dataset('../Dataset/DatasetsGerados/dataset_training_input.csv',
                      '../Dataset/DatasetsGerados/dataset_training_output.csv',
                      '../Dataset/DatasetsGerados/dataset_validation_input.csv',
                      '../Dataset/DatasetsGerados/dataset_validation_output.csv',
                      '../Dataset/DatasetsGerados/dataset_test_input.csv',
                      '../Dataset/DatasetsGerados/dataset_test_output.csv')

X_train, y_train, X_validation, y_validation, X_test, y_test, ids = dataset.get_datasets('Text', 'Label', sep='\t', rem_punctuation=False)

In [4]:
# Build model 1

# TODO Colocar código de construção e treino do modelo. Exemplo
# Create network topology

optimizer = RetGradient(learning_rate=0.01, momentum=0.90)
loss = BinaryCrossEntropy()

regulator = L2Reg(l2_val=0.001)
model1 = NeuralNetwork(epochs=15, batch_size=16, optimizer=optimizer, regulator=regulator, verbose=True, loss=loss,
                       metric=accuracy, patience=2, min_delta=0.001)

n_features = X_train.shape[1]
model1.add(DNN_DenseLayer(6, (n_features,)))
model1.add(SigmoidActivation())
model1.add(DropOutLayer(3, 0.5, (n_features,)))
model1.add(DNN_DenseLayer(1))
model1.add(SigmoidActivation())

# Train network

model1.fit(X_train, y_train, X_val=X_validation, y_val=y_validation)

# Plot learning curves

model1.plot_train_curves()

Epoch 1/15 - loss: 2240.1456 - accuracy: 0.7050
Epoch 2/15 - loss: 1028.2941 - accuracy: 0.9113
Epoch 3/15 - loss: 861.8316 - accuracy: 0.9290
Epoch 4/15 - loss: 565.8196 - accuracy: 0.9553
Epoch 5/15 - loss: 569.4495 - accuracy: 0.9555
Epoch 6/15 - loss: 356.9146 - accuracy: 0.9677
Epoch 7/15 - loss: 306.6392 - accuracy: 0.9772
Epoch 8/15 - loss: 258.1463 - accuracy: 0.9805
Epoch 9/15 - loss: 238.1251 - accuracy: 0.9818
Epoch 10/15 - loss: 244.8227 - accuracy: 0.9830
Epoch 11/15 - loss: 218.1983 - accuracy: 0.9802
Epoch 12/15 - loss: 154.5045 - accuracy: 0.9920
Epoch 13/15 - loss: 268.0864 - accuracy: 0.9782
Epoch 14/15 - loss: 194.0049 - accuracy: 0.9900
Early stopping at epoch 14


In [5]:
# Build model 2

# TODO Colocar código de construção e treino do modelo. Exemplo
# Create network topology

optimizer = RetGradient(learning_rate=0.01, momentum=0.90)
loss = BinaryCrossEntropy()

regulator = L2Reg(l2_val=0.001)
model2 = NeuralNetwork(epochs=15, batch_size=16, optimizer=optimizer, regulator=regulator, verbose=True, loss=loss,
                       metric=accuracy, patience=2, min_delta=0.001)

n_features = X_train.shape[1]
model2.add(DNN_DenseLayer(6, (n_features,)))
model2.add(SigmoidActivation())
model2.add(DropOutLayer(3, 0.5, (n_features,)))
model2.add(DNN_DenseLayer(1))
model2.add(SigmoidActivation())

# Train network

model2.fit(X_train, y_train, X_val=X_validation, y_val=y_validation)

# Plot learning curves

model2.plot_train_curves()

Epoch 1/15 - loss: 1964.9048 - accuracy: 0.7508
Epoch 2/15 - loss: 836.0686 - accuracy: 0.9303
Epoch 3/15 - loss: 519.1306 - accuracy: 0.9615
Epoch 4/15 - loss: 458.9785 - accuracy: 0.9597
Epoch 5/15 - loss: 433.6421 - accuracy: 0.9660
Epoch 6/15 - loss: 368.9321 - accuracy: 0.9695
Epoch 7/15 - loss: 301.6790 - accuracy: 0.9802
Epoch 8/15 - loss: 298.2515 - accuracy: 0.9765
Epoch 9/15 - loss: 268.9679 - accuracy: 0.9805
Epoch 10/15 - loss: 222.6463 - accuracy: 0.9832
Epoch 11/15 - loss: 287.8381 - accuracy: 0.9805
Epoch 12/15 - loss: 221.1917 - accuracy: 0.9872
Epoch 13/15 - loss: 168.1024 - accuracy: 0.9888
Epoch 14/15 - loss: 337.2214 - accuracy: 0.9815
Epoch 15/15 - loss: 226.1162 - accuracy: 0.9835
Early stopping at epoch 15


In [6]:
# Test model 1

# Predict test dataset

out = model1.predict(X_test)

if y_test is not None:
    print(model1.score(y_test, out))

# Store results

# TODO alterar para nome correto
results_filepath = './Results/model1_results.csv'

# Ensure the directory exists
os.makedirs(os.path.dirname(results_filepath), exist_ok=True)

results = dataset.merge_results(ids, out)
results.to_csv(results_filepath, sep='\t', index=False)

0.982


In [4]:
# Test model 1

# Predict test dataset

out = model2.predict(X_test)

if y_test is not None:
    print(model2.score(y_test, out))

# Store results

# TODO alterar para nome correto
results_filepath = './Results/model2_results.csv'

# Ensure the directory exists
os.makedirs(os.path.dirname(results_filepath), exist_ok=True)

results = dataset.merge_results(ids, out)
results.to_csv(results_filepath, sep='\t', index=False)

0.988


In [8]:
# Store model1 and Dataset class

dataset_filepath = './Model/dataset'
dataset_key = 'dataset'

store_model.store_model(dataset_filepath, dataset_key, dataset)

# Store network model

model1_filepath = './Model/model1'
model1_key = 'model1'

store_model.store_model(model1_filepath, model1_key, model1)

In [9]:
# Store model2 and Dataset class

dataset_filepath = './Model/dataset'
dataset_key = 'dataset'

store_model.store_model(dataset_filepath, dataset_key, dataset)

# Store network model

model2_filepath = './Model/model2'
model2_key = 'model2'

store_model.store_model(model2_filepath, model2_key, model2)

In [5]:
# Retrieve model1 and Dataset class

# Retrieve Dataset class

dataset_filepath = './Model/dataset'
dataset_key = 'dataset'

dataset = store_model.retrieve_model(dataset_filepath, dataset_key)

# Retrieve model1

model1_filepath = './Model/model1'
model1_key = 'model1'

model1 = store_model.retrieve_model(model1_filepath, model1_key)

# Load test dataset. If y_test doesn't exist, second parameter should be None

dataset.set_dataset_test('../Dataset/DatasetsGerados/dataset_test_input.csv',
                      '../Dataset/DatasetsGerados/dataset_test_output.csv')

X_test, y_test, ids = dataset.get_test_dataset('Text', 'Label', sep='\t', rem_punctuation=False)

In [3]:
# Retrieve model2 and Dataset class

# Retrieve Dataset class

dataset_filepath = './Model/dataset'
dataset_key = 'dataset'

dataset = store_model.retrieve_model(dataset_filepath, dataset_key)

# Retrieve model1

model2_filepath = './Model/model2'
model2_key = 'model2'

model2 = store_model.retrieve_model(model2_filepath, model2_key)

# Load test dataset. If y_test doesn't exist, second parameter should be None

dataset.set_dataset_test('../Dataset/DatasetsGerados/dataset_test_input.csv',
                      '../Dataset/DatasetsGerados/dataset_test_output.csv')

X_test, y_test, ids = dataset.get_test_dataset('Text', 'Label', sep='\t', rem_punctuation=False)