In [None]:
"""!git clone https://github.com/FabriDeCastelli/HLT-News-Category.git
! pip install keras-tuner

import os
os.chdir("/content/HLT-News-Category/src/test")"""

In [1]:
import warnings
import os
import sys

warnings.filterwarnings('ignore')
current_dir = %pwd

parent_dir = os.path.abspath(os.path.join(current_dir, '../..'))
sys.path.append(parent_dir)

In [2]:
from src.main.models.bidirLSMT import BidirectionalLSTM
from src.main.pipeline.functions import clean_text, stop_words_removal, remove_contractions, tokenize
from src.main.utilities.utils import get_dataset, create_embedding_matrix, load_pretrained_embedddings, split_train_val_test
from sklearn.model_selection import train_test_split
import keras as K
from config.config import (
    TENSORBOARD_LOGS, 
    EMBEDDINGS_PATH, 
    glove_file, 
    google_file, 
    fastText_file, 
    VOCAB_SIZE
    )


import warnings
warnings.filterwarnings("ignore")

In [None]:
bidirLSTM = BidirectionalLSTM()

bidirLSTM.pipeline = [clean_text, remove_contractions, stop_words_removal, tokenize]

inputs, targets = get_dataset(one_hot=True)
inputs = bidirLSTM.run_pipeline(inputs, save=False)

pretrained_embeddings = load_pretrained_embedddings(os.path.join(EMBEDDINGS_PATH, glove_file), "glove")
find_ratio, unmached_words = create_embedding_matrix(pretrained_embeddings)

# stratified train test split
x_train, x_val, x_test, y_train, y_val, y_test = split_train_val_test(inputs, targets, validation_size=0.2, test_size=0.1)

In [None]:
import random

print("Find ratio:", find_ratio)
print("Unmached tokens count:", len(unmached_words))
print("Unmached words examples:\n" + str(random.sample(unmached_words, 10)))
print(str(random.sample(unmached_words, 10)))

In [None]:
callbacks = [
    K.callbacks.EarlyStopping(monitor='val_loss', patience=2),
    K.callbacks.TensorBoard(TENSORBOARD_LOGS.format(repr(bidirLSTM)))
]
bidirLSTM.grid_search(x_train, y_train, x_val, y_val ,callbacks=callbacks, n_iter=1, refit=False)

In [None]:
%load_ext tensorboard
%tensorboard --logdir "{TENSORBOARD_LOGS.format(repr(bidirLSTM))}" --host localhost

In [None]:
#bidirLSTM.evaluate(x_test, y_test)

In [None]:
import numpy as np
from sklearn.metrics import classification_report
from src.main.utilities import plotting


y_pred = bidirLSTM.predict(x_test)


In [None]:
y_pred2 = np.argmax(y_pred, axis=1)
y_test2 = np.argmax(y_test.to_numpy(), axis=1)

plotting.plot_confusion_matrix(y_pred2, y_test2)

print(classification_report(y_pred2, y_test2))

In [None]:
bidirLSTM.save_model()