In [1]:
# Import necessary libraries and classes


# Common Imports

import numpy as np
import sys
import os

from Code.utils.dataset import Dataset
import Code.utils.store_model as store_model

# DNN imports

from Code.DNN.layers.sigmoid import SigmoidActivation
from Code.DNN.functions.metrics import mse, accuracy
from Code.DNN.networks.neuralnet import NeuralNetwork
from Code.DNN.functions.mse import MeanSquaredError
# from Code.DNN.layers.dense import DenseLayer
from Code.DNN.layers.dropout import DropOutLayer
from Code.DNN.optimizations.retained_gradient import RetGradient
from Code.DNN.optimizations.l1_reg import L1Reg
from Code.DNN.optimizations.l2_reg import L2Reg
from Code.DNN.functions.bce import BinaryCrossEntropy


# Logistic Regression imports

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

from Code.LogisticRegression.logisticReg.logisticReg import LogisticRegression


# RNN imports

from Code.RNN.layers.sigmoid import SigmoidActivation
from Code.RNN.functions.metrics import mse, accuracy
from Code.RNN.networks.recorrent_neural_network import RecorrentNeuralNetwork
from Code.RNN.functions.mse import MeanSquaredError
from Code.RNN.layers.rnn import RNN
from Code.RNN.layers.dense import DenseLayer
from Code.RNN.optimizations.retained_gradient import RetGradient
from Code.RNN.functions.bce import BinaryCrossEntropy
from Code.RNN.layers.relu import ReLUActivation

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mrjoa\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


In [2]:
# Set random seed to control randomness

np.random.seed(42)

In [3]:
# Read datasets
# Ignore if loading model from file

dataset = Dataset('../Dataset/DatasetsGerados/dataset_training_input.csv',
                      '../Dataset/DatasetsGerados/dataset_training_output.csv',
                      '../Dataset/DatasetsGerados/dataset_validation_input.csv',
                      '../Dataset/DatasetsGerados/dataset_validation_output.csv',
                      '../Dataset/dataset2_inputs.csv',
                      None)

X_train, y_train, X_validation, y_validation, X_test, y_test, ids = dataset.get_dataset_embedding('Text', 'Label', sep='\t', rem_punctuation=False)

In [4]:
# Build model

batch_size = 8

# network
optimizer = RetGradient(learning_rate=0.001, momentum=0.90)
loss = BinaryCrossEntropy()

regulator = L2Reg(l2_val=0.01)
model = RecorrentNeuralNetwork(epochs=10, batch_size=batch_size, optimizer=optimizer, regulator=regulator, verbose=True, loss=loss,
                    metric=accuracy, patience=-1, min_delta=0.001)

model.add(RNN(10, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(ReLUActivation())

model.add(DenseLayer(1))
model.add(SigmoidActivation())

# Train network

model.fit(X_train, y_train, X_val=X_validation, y_val=y_validation)

# Plot learning curves

model.plot_train_curves()

Epoch 1/10 - loss: 5732.5180 - accuracy: 0.8540
Epoch 2/10 - loss: 9350.2387 - accuracy: 0.8673
Epoch 3/10 - loss: 18636.8414 - accuracy: 0.7767


  return 1 / (1 + np.exp(-input))


Epoch 4/10 - loss: 22665.6592 - accuracy: 0.7533
Epoch 5/10 - loss: 17657.8700 - accuracy: 0.8007
Epoch 6/10 - loss: 22555.5300 - accuracy: 0.7663
Epoch 7/10 - loss: 18084.4018 - accuracy: 0.8117
Epoch 8/10 - loss: 13231.1960 - accuracy: 0.8610
Epoch 9/10 - loss: 15021.6551 - accuracy: 0.8430
Epoch 10/10 - loss: 10623.7952 - accuracy: 0.8903


In [5]:
# Test model

# Predict test dataset

out = model.predict(X_test)

if y_test is not None:
    print(model.score(y_test, out))

# Store results

results_filepath = './submissao1-grupo007-s2-v2.csv'

# Ensure the directory exists
os.makedirs(os.path.dirname(results_filepath), exist_ok=True)

results = dataset.merge_results(ids, out)
results.to_csv(results_filepath, sep='\t', index=False)

In [6]:
# Store model and Dataset class

dataset_filepath = './Model/dataset-s2'
dataset_key = 'dataset-s2'

store_model.store_model(dataset_filepath, dataset_key, dataset)

# Store network model

model_filepath = './Model/model-s2'
model_key = 'model-s2'

store_model.store_model(model_filepath, model_key, model)

In [7]:
# Retrieve model and Dataset class

# Retrieve Dataset class

dataset_filepath = './Model/dataset-s2'
dataset_key = 'dataset-s2'

dataset = store_model.retrieve_model(dataset_filepath, dataset_key)

# Retrieve model

model_filepath = './Model/model-s2'
model_key = 'model-s2'

model = store_model.retrieve_model(model_filepath, model_key)

# Load test dataset. If y_test doesn't exist, second parameter should be None

dataset.set_dataset_test('../Dataset/dataset2_inputs.csv',
                      None)

X_test, y_test, ids = dataset.get_test_dataset_embedding('Text', 'Label', sep='\t', rem_punctuation=False)