In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np

In [None]:
from utils.utils import load_data

FOLDER_PATH = "sentiment_datasets/projekt2_data"

data_df = load_data(FOLDER_PATH, df_delimiter=",")
data_df.head(10)

In [None]:
data_df["Phrase"].str.split().str.len().plot(kind="hist", title="number of tokens in line distribution", grid=True, figsize=(10,10), bins=50);

# BERT

In [None]:
from preprocess_data.bert_input_converter import get_proper_input_to_bert
from utils.utils_result import save_report, update_fit_params, evaluation

In [None]:
MAX_LEN_SEQ = 50
(x_train, y_train), (x_val, y_val), (x_test, y_test) = get_proper_input_to_bert(data_df, x_label="Phrase", y_label="Sentiment", max_len_seq=MAX_LEN_SEQ)

In [None]:
# general parameter setting
tensorboard_params = None

fit_params = {
    "x": x_train,
    "y": y_train,
    "epochs": 50,
    "batch_size": 64,
    "validation_data": (x_val, y_val) 
}

input_params = {
    "shape": (MAX_LEN_SEQ, ),
    "dtype": "int32"
}

test_result_dict = {}

## Word Level

### RNN

In [None]:
from architecture.models import model_rnn

In [None]:
# parameters setting
bert_params_rnn = {
    "trainable": True,
    "output_dim": 768,
    "output_type": "sequence_output",
    "signature": "tokens",
    "n_fine_tune_layers": 3
}

model_params_bert_rnn = {
    "input_layer": "bert_input",
    "emb_layer": "Bert",
    "input_params": input_params,
    "emb_params": bert_params_rnn,
    "out_activation": "softmax",
    "out_units": np.unique(y_train).shape[0],
    "loss": "sparse_categorical_crossentropy",
    "optimizer": "adam"
}
model_name = "model_elmo_rnn"
model_bert_rnn = model_rnn(model_params_bert_rnn)
fit_params, markered_path = update_fit_params(fit_params, model_name=model_name, tensorboard_params=tensorboard_params)

In [None]:
# training
model_bert_rnn.fit(**fit_params);
model_bert_rnn = save_report(model=model_bert_rnn, model_name=model_name, markered_path=markered_path)

In [None]:
# evaluation
test_result_dict = evaluation(model=model_bert_rnn, x_test=x_test, y_test=y_test, model_name=model_name, test_result_dict=test_result_dict)

### CNN

In [None]:
from architecture.models import model_cnn

In [None]:
# parameters setting
bert_params_cnn = {
    "trainable": True,
    "output_dim": 768,
    "output_type": "sequence_output",
    "signature": "tokens",
    "n_fine_tune_layers": 3
}

model_params_bert_cnn = {
    "input_layer": "bert_input",
    "emb_layer": "Bert",
    "input_params": input_params,
    "emb_params": bert_params_cnn,
    "out_activation": "softmax",
    "out_units": np.unique(y_train).shape[0],
    "loss": "sparse_categorical_crossentropy",
    "optimizer": "adam"
}
model_name = "model_bert_cnn"
model_bert_cnn = model_cnn(model_params_bert_cnn)
fit_params, markered_path = update_fit_params(fit_params, model_name=model_name, tensorboard_params=tensorboard_params)

In [None]:
# training
model_bert_cnn.fit(**fit_params);
# model_bert_cnn = save_report(model=model_bert_cnn, model_name=model_name, markered_path=markered_path)

In [None]:
# evaluation
test_result_dict = evaluation(model=model_bert_cnn, x_test=x_test, y_test=y_test, model_name=model_name, test_result_dict=test_result_dict)

## Phrase Level FFNN

In [None]:
from architecture.models import model_ffnn

In [None]:
# parameters setting
bert_params_ffnn = {
    "trainable": True,
    "output_dim": 768,
    "output_type": "pooled_output",
    "signature": "tokens",
    "n_fine_tune_layers": 3
}

model_params_bert_ffnn = {
    "input_layer": "bert_input",
    "emb_layer": "Bert",
    "input_params": input_params,
    "emb_params": bert_params_ffnn,
    "out_activation": "softmax",
    "out_units": np.unique(y_train).shape[0],
    "loss": "sparse_categorical_crossentropy",
    "optimizer": "adam"
}
model_name = "model_bert_ffnn"
model_bert_ffnn = model_ffnn(model_params_bert_ffnn)
fit_params, markered_path = update_fit_params(fit_params, model_name=model_name, tensorboard_params=tensorboard_params)

In [None]:
# training
model_bert_ffnn.fit(**fit_params);
model_bert_ffnn = save_report(model=model_bert_ffnn, model_name=model_name, markered_path=markered_path)

In [None]:
# evaluation
test_result_dict = evaluation(model=model_bert_ffnn, x_test=x_test, y_test=y_test, model_name=model_name, test_result_dict=test_result_dict)

# Results

In [None]:
test_result_dict

In [None]:
from utils.utils_result import update_common_history_folder, display_results

In [None]:
update_common_history_folder("about_model")
display_results("about_models/comparision")