# Neural Network Model - Aggressivity Score Prediction

In [1]:
import os
import numpy as np
import pandas as pd


# For viz
import matplotlib.pyplot as plt
import seaborn as sns

# Local imports
from utils.training_set import TrainingSet
from xgb_model import XGBoost
from utils.torch_utils import DataPreprocessor
from neural_network import NeuralTrainer

#### CONSTANTS

In [2]:
PATH = "C:/Users/fdmol/Desktop/AMLO-NLP/src/data/text_files/"
LABELED_PATH = "C:/Users/fdmol/Desktop/AMLO-NLP/src/data/amlo_labeling.xlsx"
DATA_PATH = "C:/Users/fdmol/Desktop/AMLO-NLP/src/data/"

### Pipeline

Create training set, along with its correspoding txt files

In [3]:
all_files = os.listdir(PATH)

training_set = TrainingSet(remove_stopwords=True)
training_set.create_training_set()

 39%|███▉      | 490/1246 [00:00<00:00, 4646.41it/s]

Conference 20181207 is not agressive
Conference 20190102 is not agressive
Conference 20190111 is not agressive
Conference 20190227 is not agressive
Conference 20200128 is not agressive
Conference 20210510 is not agressive


100%|██████████| 1246/1246 [00:00<00:00, 5142.98it/s]

Conference 20221125 is not agressive





In [4]:
# Specify the path to your training data folder
folder_path = "C:/Users/fdmol/Desktop/AMLO-NLP/src/data/training_data/"

param = {
    "max_depth": 8,
    "eta": 0.15,
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
}


xgb_model = XGBoost(
    folder_path=folder_path,
    dialogues_path=training_set.DIALOGUES_PATH,
    xgb_params=param,
)


xgb_model.create_regression_training_df()
xgb_model.create_unseen_df()

In [5]:
training_df = xgb_model.training_df
unseen_df = xgb_model.unseen_df

### Trying to implement a NNet model

In [8]:
data_preprocessor = DataPreprocessor(training_df, unseen_df)
train_loader, unseen_loader = data_preprocessor.prepare_data()
vocab = data_preprocessor.vocab


In [None]:
neural_trainer = NeuralTrainer(train_loader, unseen_loader)

In [7]:
# Histogram of the scores
sns.set()
plt.hist(nnet_agressivity_scores["score"], bins=25)
plt.xlabel("Score")
plt.ylabel("Frequency")
plt.title("Distribution of Aggressivity Scores - Neural Net Model")

NameError: name 'nnet_agressivity_scores' is not defined

### Add this score to the complete dataset

In [None]:
complete_dataset_BDD = pd.read_csv(f"{DATA_PATH}complete_dataset_BDD.csv")

nnet_agressivity_scores.rename(columns={"score": "nnet_score"}, inplace=True)

In [None]:
complete_dataset_BDD = pd.merge(
    complete_dataset_BDD,
    nnet_agressivity_scores[["id", "nnet_score"]],
    on="id",
    how="left",
)

complete_dataset_BDD.to_csv(f"{DATA_PATH}complete_dataset_BDD.csv", index=False)