# Feedforward Neural Networks (FNN)

In [2]:
import math
import pandas as pd
import numpy as np 
import tensorflow as tf
import warnings
import spacy

from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import seaborn as sns


warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

## Load Data

In [3]:
df_train = pd.read_csv('data/train_data.txt', header=None, delimiter=';')
df_test = pd.read_csv('data/test_data.txt', header=None, delimiter=';')

df_train = df_train.rename(columns= {0: 'text', 1: 'feeling'})
df_test = df_test.rename(columns= {0: 'text', 1: 'feeling'})

print(df_train.shape)
print(df_test.shape)

(16000, 2)
(2000, 2)


In [None]:
display(df_train.head(3))
display(df_test.head(3))

> The column **text** is going to be the input feature and **feeling** is going to be the output target.

In [None]:
df_train['feeling'].value_counts()

In [None]:
df_test['feeling'].value_counts()

## Data preprocessing using Spacy

[Oficial site](https://spacy.io/).

In [None]:
!python -m spacy download en_core_web_md -q

In [13]:
# Load the dict
spacy_nlp = spacy.load('en_core_web_md')

In [16]:
def data_preprocessing(text):

    doc = spacy_nlp(text)

    tokens = [token.lemma_.lower().strip() for token in doc if not token.is_stop]

    return ' '.join(tokens)

In [19]:
df_train['transformed_text'] = df_train['text'].apply(data_preprocessing)
df_test['transformed_text'] = df_test['text'].apply(data_preprocessing)

In [None]:
display(df_train.head())
display(df_test.head())

## Building the model

### 1) TF-IDF Vectorizer

Here is the English translation:

This line above creates an instance of the **TfidfVectorizer** from the scikit-learn library, which is a tool used to convert a collection of raw documents into a TF-IDF (Term Frequency-Inverse Document Frequency) feature matrix. TF-IDF is a statistical technique used to quantify the importance of a word in a set of documents, commonly employed in natural language processing tasks and information retrieval.

**Parameter max_df=0.95**: This parameter defines the maximum document frequency threshold for the terms to be considered. Here, it's set to 0.95, meaning that words appearing in more than 95% of the documents will be ignored. This helps eliminate common words that do not contribute much to the meaning of the text.

**Parameter min_df=2**: This parameter establishes the minimum document frequency for the terms. In this case, terms that appear in fewer than two documents will be ignored. This helps filter out rare terms that may occur in only a few samples and are, therefore, less relevant to the overall analysis.

**Parameter stop_words='english'**: This parameter instructs the vectorizer to remove all English stop words from the analysis. Stop words are common words (such as "and," "the," "in") that are usually filtered out in natural language processing because they are very frequent and do not carry significant information for text analysis.

In [60]:
# Defining the vectorizer
tf_idf = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english')

df_train_tfidf = tf_idf.fit_transform(df_train['transformed_text'])
df_test_tfidf = tf_idf.transform(df_test['transformed_text'])

In [61]:
# Coverting the input data to array 
X_train_array = df_train_tfidf.toarray()
X_test_array = df_test_tfidf.toarray()

### 2) Data preparation

We need to convert the target variable for numerical representation. We will use Label Encoding.

In [32]:
# Defining the Label encoder
le = LabelEncoder()

# Doing the fit and transforming the target
y_train_le = le.fit_transform(df_train['feeling'])
y_test_le = le.transform(df_test['feeling'])

Let's handle with class imbalance using the function `compute_class_weight`.

**compute_class_weight**: This is a function from scikit-learn that calculates the weights for the classes. These weights can be used in classification models to give more importance to underrepresented classes in the dataset.

**'balanced'**: This parameter indicates that the class weights should be computed in a way that balances the dataset. This is done inversely proportional to the frequency of the classes in the dataset. More frequent classes receive a lower weight, while less frequent classes receive a higher weight.

**classes = np.unique(y_treino_le)**: Here, np.unique(y_treino_le) finds all the unique classes in the training dataset. The parameter `classes` informs the `compute_class_weight` function about these unique classes.

**y = y_treino_le**: This is the label vector of the training dataset. The function will use these labels to calculate the frequency of each class.

The result, stored in `pesos_classes`, is an array where each class has an associated weight. These weights can be used in classification models (such as a decision tree, logistic regression, SVM, etc.) to compensate for class imbalance.

In [34]:
# Class weights
class_weight = compute_class_weight('balanced', classes = np.unique(y_train_le), y = y_train_le)

In [None]:
# Data spliting

X_train, X_val, y_train, y_val = train_test_split(X_train_array,
                                                  y_train_le,
                                                  test_size=0.2,
                                                  stratify = y_train_le)

# Converting the target to categorial
y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test_le)
y_val_encoded = to_categorical(y_val)

# Shape
y_train_encoded.shape, y_test_encoded.shape, y_val_encoded.shape

---

## 3) Building the model

In [38]:
# Building the model

# Initializing a sequential model. Sequential models are a linear stack of layers.
model = tf.keras.Sequential()

# Adding the first dense layer (fully-connected) to the model.
model.add(
    tf.keras.layers.Dense(4096,
                   # Using activation function SELU (Scaled Exponential Linear Unit)
                   activation='selu',
                   # Initializing the weights with Lecun normal distribution
                   kernel_initializer='lecun_normal',
                   # Defining the input shape
                   input_shape=(X_train.shape[1],),
                   # Applying regularizer L2 to reduce the overfitting
                   kernel_regularizer = tf.keras.regularizers.l2(0.01)))

# Adding the second dense layer
model.add(
    tf.keras.layers.Dense(2048,
                   activation='selu',
                   kernel_initializer = 'lecun_normal',  
                   kernel_regularizer = tf.keras.regularizers.l2(0.01)))  

# Adding the third dense layer
model.add(
    tf.keras.layers.Dense(1024,
                   activation='selu',
                   kernel_initializer='lecun_normal',
                   kernel_regularizer=tf.keras.regularizers.l2(0.1)))

# Adding the fourth dense layer
# Layer with 64 neurons and activation SELU
model.add(tf.keras.layers.Dense(64, 
                         activation='selu'))

# Adding the output layer
# Output layer with 6 neurons and softmax activation for multiclass classification
model.add(
    tf.keras.layers.Dense(6,
                   activation='softmax')
)


In [None]:
# Assigning specific weights to the bias array of the last layer of the model
model.layers[-1].bias.assign(class_weight)

In [None]:

# Compiling the model

# Defining Adam optimizer
# Adam is an optimization algorithm that can be used in place of the classical stochastic gradient descent 
# procedure to iteratively update network weights based on training data. It defines the loss function as 'categorical_crossentropy'. 
# It is suitable for multiclass classification problems, where labels are provided in a one-hot encoded format. 
# It defines the model evaluation metric as 'accuracy'. Accuracy is a common metric for evaluating the performance of classification models.
model.compile(optimizer='Adam',
              loss=tf.losses.categorical_crossentropy,
              metrics=['accuracy'])

model.summary()

## 5) Callbacks and Early Stopping

In [43]:
def step_decay(epoch):
    initial_rate = 0.001
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_rate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
    return lrate

In [44]:
# Learning rate Scheduler
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(step_decay)

# Early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=5)

## 6) Training the model

In [None]:
%%time

# Hyperparameters
epochs_len = 20
batch_size = 256

history = model.fit(
    X_train,
    y_train_encoded,
    validation_data = (X_val, y_val_encoded),
    epochs = epochs_len,
    batch_size = batch_size,
    callbacks = [early_stopping, lr_scheduler]
)

## 7) Model evaluation

In [48]:
# Extracing the errors
loss, val_loss = history.history['loss'], history.history['val_loss']

In [None]:
# Plot
plt.plot(loss, label = 'loss')
plt.plot(val_loss, label = 'val_loss')
plt.legend()
plt.show()

In [None]:
# Predictions with test data
predictions = model.predict(X_test_array)

# Extracting the labels
predictions_labels = predictions.argmax(axis = 1)

# Metrics
print('Metrics')
print(classification_report(y_test_le, predictions_labels))

# Confusion matrix
print('Confusion Matrix')
print(confusion_matrix(y_test_le, predictions_labels))

# Accuracy
print('Accuracy')
print(accuracy_score(y_test_le, predictions_labels))

In [70]:
# Saving the model
model.save('models/model_v1.keras')

---

## 8) Loading model

In [None]:
# Loading the model
loaded_model = tf.keras.models.load_model('models/model_v1.keras')

# New sentence
sentence = 'I am distressed because my girlfriend is sick'

# Creating a dataframe with the sentence
df_new = pd.DataFrame({'text': [sentence]})

# Applying processing function
df_new['transformed_text'] = df_new['text'].apply(data_preprocessing)

df_new.head()

In [None]:
# Vectorizing
df_new_tfidf = tf_idf.transform(df_new['transformed_text'])

# To array
df_new_array = df_new_tfidf.toarray()

# Previsões
prediction = loaded_model.predict(df_new_array)

# Seleciona a classe com maior probabilidade
class_prob = np.argmax(prediction, axis = 1)

# Obtém o nome da classe
class_label = le.inverse_transform(class_prob)

# Class predicted
print(f'The feeling is {class_label}')

## The End