# Load packages


In [None]:
import os
from pathlib import Path

import joblib
import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torchtext
import transformers
import umap
from nltk.tokenize import TweetTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
from torchtext.vocab import build_vocab_from_iterator
from tqdm import tqdm

from scripts.data_loading_utils import load_embedding, read_tweet_data
from scripts.model_training_utils import (
    get_labels_and_predictions,
    plot_confusion_matrix,
    plot_metrics,
    training_loop,
)
from scripts.models import LSTM, BERTClassifier, LSTMWithAttention, count_parameters
from scripts.plotting_utilities import (
    generate_ngram_frequencies,
    generate_wordcloud_with_ngrams,
    plot_top_common_ngrams,
)
from scripts.text_preprocessing_utils import preprocess_tweet
from scripts.tweet_data_set import BERTTweetsDataset, TweetsDataset

print(f"PyTorch version: {torch.__version__}")
print(f"torchtext version: {torchtext.__version__}")

In [None]:
nltk.download("stopwords")

In [None]:
pd.set_option("display.max_colwidth", None)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
print(f"Using device: {device}")

# Setup seeds


Seed random generator to guarantee reproducibility:


In [None]:
def seed_everything(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


seed_everything(42)

# Setup Data Path


In [None]:
data_dir_path = Path(os.path.join(os.getcwd(), "data"))

In [None]:
models_weights_dir_path = Path(os.path.join(os.getcwd(), "models_weights"))

In [None]:
print(f"Data directory: {data_dir_path}")
print(f"Models weights directory: {models_weights_dir_path}")

# Read data


In [None]:
training_data = read_tweet_data(data_dir_path / "twitter-training-data.txt")

In [None]:
development_data = read_tweet_data(data_dir_path / "twitter-dev-data.txt")

In [None]:
test1_data = read_tweet_data(data_dir_path / "twitter-test1.txt")

In [None]:
test2_data = read_tweet_data(data_dir_path / "twitter-test2.txt")

In [None]:
test3_data = read_tweet_data(data_dir_path / "twitter-test3.txt")

In [None]:
training_data.head()

In [None]:
print(f"Training data: {training_data['tweet_sentiment'].value_counts().to_dict()}")
print(
    f"Development data: {development_data['tweet_sentiment'].value_counts().to_dict()}"
)

In [None]:
print(
    f"Training data: {training_data['tweet_sentiment'].value_counts(normalize=True).to_dict()}"
)

print(
    f"Development data: {development_data['tweet_sentiment'].value_counts(normalize=True).to_dict()}"
)

# Data Cleaning & Exploratory Data Analysis


In [None]:
tokenizer = TweetTokenizer()

In [None]:
# this code takes 20 seconds
training_data["tweet_text_cleaned"] = training_data["tweet_text"].apply(
    lambda tweet: preprocess_tweet(tweet, tokenizer)
)

In [None]:
development_data["tweet_text_cleaned"] = development_data["tweet_text"].apply(
    lambda tweet: preprocess_tweet(tweet, tokenizer)
)

In [None]:
test1_data["tweet_text_cleaned"] = test1_data["tweet_text"].apply(
    lambda tweet: preprocess_tweet(tweet, tokenizer)
)

In [None]:
test2_data["tweet_text_cleaned"] = test2_data["tweet_text"].apply(
    lambda tweet: preprocess_tweet(tweet, tokenizer)
)

In [None]:
test3_data["tweet_text_cleaned"] = test3_data["tweet_text"].apply(
    lambda tweet: preprocess_tweet(tweet, tokenizer)
)

In [None]:
positive_tweets = training_data[training_data["tweet_sentiment"] == "positive"]
negative_tweets = training_data[training_data["tweet_sentiment"] == "negative"]
neutral_tweets = training_data[training_data["tweet_sentiment"] == "neutral"]

## Tweet length:


In [None]:
training_data["tweet_length"] = training_data["tweet_text"].str.len()
training_data["tweet_cleaned_length"] = training_data["tweet_text_cleaned"].str.len()

In [None]:
sns.displot(
    data=training_data,
    x="tweet_length",
    hue="tweet_sentiment",
    col="tweet_sentiment",
)

In [None]:
sns.displot(
    data=training_data,
    x="tweet_cleaned_length",
    hue="tweet_sentiment",
    col="tweet_sentiment",
)

## Generate n-grams frequencies


In [None]:
positive_unigram_freq = generate_ngram_frequencies(
    corpus=positive_tweets["tweet_text_cleaned"], n_grams=1, max_features=1000
)

positive_bigram_freq = generate_ngram_frequencies(
    corpus=positive_tweets["tweet_text_cleaned"], n_grams=2, max_features=1000
)

positive_trigram_freq = generate_ngram_frequencies(
    corpus=positive_tweets["tweet_text_cleaned"], n_grams=3, max_features=1000
)

In [None]:
negative_unigram_freq = generate_ngram_frequencies(
    corpus=negative_tweets["tweet_text_cleaned"], n_grams=1, max_features=1000
)

negative_bigram_freq = generate_ngram_frequencies(
    corpus=negative_tweets["tweet_text_cleaned"], n_grams=2, max_features=1000
)

negative_trigram_freq = generate_ngram_frequencies(
    corpus=negative_tweets["tweet_text_cleaned"], n_grams=3, max_features=1000
)

In [None]:
neutral_unigram_freq = generate_ngram_frequencies(
    corpus=neutral_tweets["tweet_text_cleaned"], n_grams=1, max_features=1000
)

neutral_bigram_freq = generate_ngram_frequencies(
    corpus=neutral_tweets["tweet_text_cleaned"], n_grams=2, max_features=1000
)

neutral_trigram_freq = generate_ngram_frequencies(
    corpus=neutral_tweets["tweet_text_cleaned"], n_grams=3, max_features=1000
)

In [None]:
plot_top_common_ngrams(
    [positive_unigram_freq, positive_bigram_freq, positive_trigram_freq]
)

In [None]:
plot_top_common_ngrams(
    [negative_unigram_freq, negative_bigram_freq, negative_trigram_freq]
)

In [None]:
plot_top_common_ngrams(
    [neutral_unigram_freq, neutral_bigram_freq, neutral_trigram_freq]
)

## Generate wordclouds


In [None]:
for idx, n_gram_freq_dict in enumerate(
    [positive_unigram_freq, positive_bigram_freq, positive_trigram_freq]
):
    generate_wordcloud_with_ngrams(n_gram_freq_dict, idx + 1, "Positive tweets")

In [None]:
for idx, n_gram_freq_dict in enumerate(
    [negative_unigram_freq, negative_bigram_freq, negative_trigram_freq]
):
    generate_wordcloud_with_ngrams(n_gram_freq_dict, idx + 1, "Negative tweets")

In [None]:
for idx, n_gram_freq_dict in enumerate(
    [neutral_unigram_freq, neutral_bigram_freq, neutral_trigram_freq]
):
    generate_wordcloud_with_ngrams(n_gram_freq_dict, idx + 1, "Neutral tweets")

## UMAP


Generate a scatter plot of the data by reducing its dimensionality using TF-IDF features and UMAP algorithm:


In [None]:
tfidf_vectorizer = TfidfVectorizer(
    min_df=5, stop_words="english", ngram_range=(1, 3), max_features=5000
)

In [None]:
tfidf_word_doc_matrix = tfidf_vectorizer.fit_transform(
    training_data["tweet_text_cleaned"]
)

In [None]:
# this code takes 30 seconds
tfidf_embedding = umap.UMAP(metric="hellinger").fit(tfidf_word_doc_matrix)

In [None]:
sns.scatterplot(
    x=tfidf_embedding.embedding_[:, 0],
    y=tfidf_embedding.embedding_[:, 1],
    hue=training_data["tweet_sentiment"],
)

# Traditional classification


## Split data


Split training data into training and validation for performing K-fold cross validation.


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    training_data["tweet_text_cleaned"],
    training_data["tweet_sentiment"],
    test_size=0.2,
    random_state=42,
    stratify=training_data["tweet_sentiment"],
)

In [None]:
print(f"Training data: {y_train.value_counts(normalize=True).to_dict()}")
print(f"Test data: {y_test.value_counts(normalize=True).to_dict()}")

## Naive Bayes


In [None]:
naive_bayes_pipeline = Pipeline(
    [
        ("vect", CountVectorizer()),
        ("clf", MultinomialNB()),
    ]
)

In [None]:
parameters = {
    "vect": [TfidfVectorizer(), CountVectorizer()],
    "vect__stop_words": ["english"],
    "vect__max_df": (0.5, 0.75, 1.0),
    "vect__min_df": [5, 10, 15],
    "vect__max_features": (None, 5000, 10000, 50000),
    "vect__ngram_range": [(1, 1), (1, 2), (1, 3)],
    "clf__alpha": (0.01, 0.1, 1),
}

In [None]:
naive_bayes_grid_search = GridSearchCV(
    naive_bayes_pipeline, parameters, cv=5, n_jobs=-1, verbose=1
)

In [None]:
# this code takes 6 minutes and 45 seconds
naive_bayes_grid_search.fit(X_train, y_train)

In [None]:
print("Best score: %0.3f" % naive_bayes_grid_search.best_score_)
print("Best parameters set:")
best_parameters = naive_bayes_grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

In [None]:
y_pred = naive_bayes_grid_search.best_estimator_.predict(X_test)

In [None]:
y_pred

In [None]:
# 0 --> negative
# 1 --> positive
# 2 --> neutral
plot_confusion_matrix(y_test, y_pred, "Naive Bayes")

In [None]:
naive_bayes_model_path = models_weights_dir_path / "naive_bayes_model.joblib"
joblib.dump(naive_bayes_grid_search.best_estimator_, naive_bayes_model_path)

## Logistic Regression


In [None]:
logistic_regression_pipeline = Pipeline(
    [
        ("vect", CountVectorizer()),
        (
            "clf",
            LogisticRegression(max_iter=500, random_state=42),
        ),
    ]
)

In [None]:
parameters = {
    "vect": [TfidfVectorizer(), CountVectorizer()],
    "vect__stop_words": ["english"],
    "vect__min_df": [10, 15, 25],
    "vect__max_features": (500, 1000),
    "vect__ngram_range": [(1, 1), (1, 2), (1, 3)],
    "clf__C": [0.001, 0.01, 0.1, 1, 10, 100],
}

In [None]:
logistic_regression_grid_search = GridSearchCV(
    logistic_regression_pipeline, parameters, cv=5, n_jobs=-1, verbose=1
)

In [None]:
# this code takes 3 minutes
logistic_regression_grid_search.fit(X_train, y_train)

In [None]:
print("Best score: %0.3f" % logistic_regression_grid_search.best_score_)
print("Best parameters set:")
best_parameters = logistic_regression_grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))

In [None]:
# Evaluate the best grid search pipeline on the test dataset
y_pred = logistic_regression_grid_search.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
# 0 --> negative
# 1 --> positive
# 2 --> neutral
plot_confusion_matrix(y_test, y_pred, "Logistic Regression")

In [None]:
joblib.dump(
    logistic_regression_grid_search.best_estimator_,
    models_weights_dir_path / "logistic_regression_model.joblib",
)

## SVC


In [None]:
svm_pipeline = Pipeline(
    [
        ("vect", TfidfVectorizer(max_features=1000, min_df=10, stop_words="english")),
        ("clf", SVC(random_state=42)),
    ]
)

In [None]:
# this code takes 2 minutes and 20 seconds
svm_pipeline.fit(X_train, y_train)

In [None]:
y_pred = svm_pipeline.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
# 0 --> negative
# 1 --> positive
# 2 --> neutral
plot_confusion_matrix(y_test, y_pred, "SVM")

In [None]:
joblib.dump(svm_pipeline, models_weights_dir_path / "svm_model.joblib")

# Deep Learning Models


## Load GloVe embedding:


In [None]:
embedding_file_name = "glove.6B.100d.txt"
glove_embedding_dict = load_embedding(data_dir_path / embedding_file_name)

In [None]:
print(f"Number of words in GloVe embedding: {len(glove_embedding_dict):,}")

In [None]:
def yield_tokens(tweets_list):
    for tweet in tweets_list:
        yield tweet.strip().split()

In [None]:
# set max tokens to 5000
special_tokens = ["<unk>", "<pad>"]
min_freq = 5
max_tokens = 5000

In [None]:
vocab = build_vocab_from_iterator(
    iterator=yield_tokens(training_data["tweet_text_cleaned"].tolist()),
    min_freq=min_freq,
    specials=special_tokens,
    max_tokens=max_tokens,
)

In [None]:
unk_index = vocab["<unk>"]
pad_index = vocab["<pad>"]
vocab.set_default_index(vocab["<unk>"])

In [None]:
print(f"Vocabulary size: {len(vocab)}")

## Build embedding matrix


In [None]:
vocab_size = len(vocab)
embedding_dim = 100
embedding_matrix = torch.zeros((vocab_size, embedding_dim))

In [None]:
print(embedding_matrix.shape)

Initialize the `embedding_matrix` with `GloVe` vectors.

If a given word from the vocab don't have a corresponding `GloVe` embedding, initialized it with a _random_ embedding.


In [None]:
unknown_words = []

In [None]:
for word, idx in tqdm(vocab.get_stoi().items()):
    if word in glove_embedding_dict:
        embedding_matrix[idx] = torch.tensor(glove_embedding_dict[word])
    else:
        unknown_words.append(word)
        embedding_matrix[idx] = torch.randn(embedding_dim)

In [None]:
embedding_matrix.shape

In [None]:
print(
    f"There are {len(unknown_words)} ({len(unknown_words) / len(vocab):.2f}%) words in the vocabulary that are not in the GloVe embedding."
)

In [None]:
print(unknown_words)

## Define Datasets and Dataloaders


In [None]:
encoder = LabelEncoder()

encoder.fit(training_data["tweet_sentiment"])

In [None]:
print(encoder.classes_)

In [None]:
train_dataset = TweetsDataset(
    tweet_ids=training_data["tweet_id"],
    tweets=training_data["tweet_text_cleaned"],
    labels=training_data["tweet_sentiment"],
    vocab=vocab,
    label_encoder=encoder,
)

In [None]:
development_dataset = TweetsDataset(
    tweet_ids=development_data["tweet_id"],
    tweets=development_data["tweet_text_cleaned"],
    labels=development_data["tweet_sentiment"],
    vocab=vocab,
    label_encoder=encoder,
)

In [None]:
test1_dataset = TweetsDataset(
    tweet_ids=test1_data["tweet_id"],
    tweets=test1_data["tweet_text_cleaned"],
    labels=test1_data["tweet_sentiment"],
    vocab=vocab,
    label_encoder=encoder,
)

In [None]:
test2_dataset = TweetsDataset(
    tweet_ids=test2_data["tweet_id"],
    tweets=test2_data["tweet_text_cleaned"],
    labels=test2_data["tweet_sentiment"],
    vocab=vocab,
    label_encoder=encoder,
)

In [None]:
test3_dataset = TweetsDataset(
    tweet_ids=test3_data["tweet_id"],
    tweets=test3_data["tweet_text_cleaned"],
    labels=test3_data["tweet_sentiment"],
    vocab=vocab,
    label_encoder=encoder,
)

In [None]:
def collate_batch(batch):
    tweet_ids = np.array([item[0] for item in batch])
    tweets = [item[1] for item in batch]
    labels = np.array([item[2] for item in batch])

    padded_tweets = pad_sequence(tweets, batch_first=True, padding_value=vocab["<pad>"])

    return tweet_ids, padded_tweets, torch.from_numpy(labels).to(dtype=torch.long)

In [None]:
batch_size = 256

In [None]:
train_dataloader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_batch
)

development_dataloader = DataLoader(
    development_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch
)

test1_dataloader = DataLoader(
    test1_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch
)

test2_dataloader = DataLoader(
    test2_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch
)

test3_dataloader = DataLoader(
    test3_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch
)

## LSTM


In [None]:
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 300
output_dim = 3
n_layers = 2
bidirectional = True
dropout_rate = 0.5

In [None]:
lstm_model = LSTM(
    vocab_size,
    embedding_dim,
    hidden_dim,
    output_dim,
    n_layers,
    bidirectional,
    dropout_rate,
    pad_index,
)

In [None]:
print(f"The LSTM model has {count_parameters(lstm_model):,} trainable parameters")

In [None]:
lstm_model.embedding.weight.data = embedding_matrix

In [None]:
n_epochs = 20
lr = 5e-4
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=lr)

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
lstm_model = lstm_model.to(device)
criterion = criterion.to(device)

In [None]:
lstm_model_path = models_weights_dir_path / "lstm_model.pt"

In [None]:
# this code takes one minute
metrics = training_loop(
    n_epochs,
    train_dataloader,
    development_dataloader,
    lstm_model,
    criterion,
    optimizer,
    device,
    False,
    None,
    lstm_model_path,
)

In [None]:
plot_metrics(metrics, "Bi-LSTM with 2 layers")

In [None]:
lstm_model.load_state_dict(torch.load(lstm_model_path))

In [None]:
y_true, y_pred = get_labels_and_predictions(
    lstm_model, development_dataloader, encoder, device, False
)

In [None]:
# 0 --> negative
# 1 --> positive
# 2 --> neutral
plot_confusion_matrix(y_true, y_pred, "Bi-LSTM with 2 layers")

## LSTM with Attention


In [None]:
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 300
output_dim = 3
n_layers = 2
bidirectional = True
dropout_rate = 0.5

In [None]:
lstm_with_attention_model = LSTMWithAttention(
    vocab_size,
    embedding_dim,
    hidden_dim,
    output_dim,
    n_layers,
    bidirectional,
    dropout_rate,
    pad_index,
)

In [None]:
print(
    f"The LSTM with attention model has {count_parameters(lstm_with_attention_model):,} trainable parameters"
)

In [None]:
lstm_with_attention_model.embedding.weight.data = embedding_matrix

In [None]:
n_epochs = 20
lr = 5e-4
optimizer = torch.optim.Adam(lstm_with_attention_model.parameters(), lr=lr)

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
lstm_with_attention_model = lstm_with_attention_model.to(device)
criterion = criterion.to(device)

In [None]:
lstm_with_attention_model_path = (
    models_weights_dir_path / "lstm_with_attention_model.pt"
)

In [None]:
metrics = training_loop(
    n_epochs,
    train_dataloader,
    development_dataloader,
    lstm_with_attention_model,
    criterion,
    optimizer,
    device,
    False,
    None,
    lstm_with_attention_model_path,
)

In [None]:
plot_metrics(metrics, "Bi-LSTM with attention")

In [None]:
lstm_with_attention_model.load_state_dict(torch.load(lstm_with_attention_model_path))

In [None]:
y_true, y_pred = get_labels_and_predictions(
    lstm_with_attention_model, development_dataloader, encoder, device, False
)

In [None]:
# 0 --> negative
# 1 --> positive
# 2 --> neutral
plot_confusion_matrix(y_true, y_pred, "Bi-LSTM with attention")

## BERT


In [None]:
transformer_name = "bert-base-uncased"
bert_tokenizer = transformers.AutoTokenizer.from_pretrained(transformer_name)
bert_transformer = transformers.AutoModel.from_pretrained(transformer_name)

In [None]:
print(bert_transformer.config.hidden_size)

In [None]:
batch_size = 8
n_epochs = 3
lr = 1e-5

In [None]:
for experiment_name, feature_column in [
    ("bert_raw_tweets", "tweet_text"),
    ("bert_cleaned_tweets", "tweet_text_cleaned"),
]:
    # create BERT-based dataset
    bert_train_dataset = BERTTweetsDataset(
        tweet_ids=training_data["tweet_id"],
        tweets=training_data[feature_column],
        labels=training_data["tweet_sentiment"],
        tokenizer=bert_tokenizer,
        label_encoder=encoder,
    )

    bert_development_dataset = BERTTweetsDataset(
        tweet_ids=development_data["tweet_id"],
        tweets=development_data[feature_column],
        labels=development_data["tweet_sentiment"],
        tokenizer=bert_tokenizer,
        label_encoder=encoder,
    )

    # create dataloaders
    bert_train_dataloader = DataLoader(
        bert_train_dataset, batch_size=batch_size, shuffle=True
    )

    bert_development_dataloader = DataLoader(
        bert_development_dataset, batch_size=batch_size, shuffle=False
    )

    bert_model = BERTClassifier(
        transformer=bert_transformer, output_dim=len(encoder.classes_), freeze=False
    )

    optimizer = torch.optim.Adam(bert_model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    bert_model = bert_model.to(device)
    criterion = criterion.to(device)

    print(
        f"The BERT sentiment model has {count_parameters(bert_model):,} trainable parameters"
    )

    bert_model_path = models_weights_dir_path / f"{experiment_name}.pt"

    metrics = training_loop(
        n_epochs,
        bert_train_dataloader,
        bert_development_dataloader,
        bert_model,
        criterion,
        optimizer,
        device,
        True,
        None,
        bert_model_path,
    )

    plot_metrics(metrics, experiment_name)

    bert_model.load_state_dict(torch.load(bert_model_path))

    y_true, y_pred = get_labels_and_predictions(
        bert_model,
        bert_development_dataloader,
        encoder,
        device,
        True,
    )

    plot_confusion_matrix(y_true, y_pred, experiment_name)

# Calculating predictions on test1, test2, and test3 datasets


In this section, we will load saved models in the `models_weights` folder, and calculate predictions and `f1` score for the three testing datasets.


## Naive Bayes, Logistic Regression, and SVM


In [None]:
naive_bayes_pipeline = joblib.load(models_weights_dir_path / "naive_bayes_model.joblib")
logistic_regression_pipeline = joblib.load(
    models_weights_dir_path / "logistic_regression_model.joblib"
)
svm_pipeline = joblib.load(models_weights_dir_path / "svm_model.joblib")

In [None]:
for test_data, test_data_name in [
    (test1_data, "test1"),
    (test2_data, "test2"),
    (test3_data, "test3"),
]:
    y_true = test_data["tweet_sentiment"]

    y_pred_nb = naive_bayes_pipeline.predict(test_data["tweet_text_cleaned"])
    y_pred_lr = logistic_regression_pipeline.predict(test_data["tweet_text_cleaned"])
    y_pred_svm = svm_pipeline.predict(test_data["tweet_text_cleaned"])

    plot_confusion_matrix(y_true, y_pred_nb, f"Naive Bayes - {test_data_name}")

    plot_confusion_matrix(
        y_true,
        y_pred_lr,
        f"Logistic Regression - {test_data_name}",
    )

    # 0 --> negative
    # 1 --> positive
    # 2 --> neutral
    plot_confusion_matrix(y_true, y_pred_svm, f"SVM - {test_data_name}")

## LSTM, LSTM with attention


In [None]:
lstm_model.load_state_dict(torch.load(models_weights_dir_path / "lstm_model.pt"))

In [None]:
lstm_with_attention_model.load_state_dict(
    torch.load(models_weights_dir_path / "lstm_with_attention_model.pt")
)

In [None]:
for test_data, test_data_name in [
    (test1_data, "test1"),
    (test2_data, "test2"),
    (test3_data, "test3"),
]:
    test_dataset = TweetsDataset(
        tweet_ids=test_data["tweet_id"],
        tweets=test_data["tweet_text_cleaned"],
        labels=test_data["tweet_sentiment"],
        vocab=vocab,
        label_encoder=encoder,
    )

    batch_size = 256

    test_data_loader = DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch
    )

    y_true, y_pred = get_labels_and_predictions(
        lstm_model, test_data_loader, encoder, device, False
    )

    plot_confusion_matrix(y_true, y_pred, f"Bi-LSTM - {test_data_name}")

    y_true, y_pred = get_labels_and_predictions(
        lstm_with_attention_model, test_data_loader, encoder, device, False
    )

    # 0 --> negative
    # 1 --> positive
    # 2 --> neutral
    plot_confusion_matrix(y_true, y_pred, f"Bi-LSTM with attention - {test_data_name}")

## BERT


In [None]:
bert_transformer = transformers.AutoModel.from_pretrained(transformer_name)

bert_raw_tweets = BERTClassifier(
    transformer=bert_transformer, output_dim=len(encoder.classes_), freeze=False
)

bert_raw_tweets.load_state_dict(
    torch.load(models_weights_dir_path / "bert_raw_tweets.pt")
)

bert_raw_tweets = bert_raw_tweets.to(device)

In [None]:
bert_transformer = transformers.AutoModel.from_pretrained(transformer_name)

bert_cleaned_tweets = BERTClassifier(
    transformer=bert_transformer, output_dim=len(encoder.classes_), freeze=False
)

bert_cleaned_tweets.load_state_dict(
    torch.load(models_weights_dir_path / "bert_cleaned_tweets.pt")
)

bert_cleaned_tweets = bert_cleaned_tweets.to(device)

In [None]:
for test_data, test_data_name in [
    (test1_data, "test1"),
    (test2_data, "test2"),
    (test3_data, "test3"),
]:

    bert_dataset = BERTTweetsDataset(
        tweet_ids=test_data["tweet_id"],
        tweets=test_data["tweet_text"],
        labels=test_data["tweet_sentiment"],
        tokenizer=bert_tokenizer,
        label_encoder=encoder,
    )

    batch_size = 8

    bert_data_loader = DataLoader(bert_dataset, batch_size=batch_size, shuffle=False)

    y_true, y_pred = get_labels_and_predictions(
        bert_raw_tweets, bert_data_loader, encoder, device, True
    )

    plot_confusion_matrix(y_true, y_pred, f"BERT raw tweets - {test_data_name}")

    bert_dataset = BERTTweetsDataset(
        tweet_ids=test_data["tweet_id"],
        tweets=test_data["tweet_text_cleaned"],
        labels=test_data["tweet_sentiment"],
        tokenizer=bert_tokenizer,
        label_encoder=encoder,
    )

    bert_data_loader = DataLoader(bert_dataset, batch_size=batch_size, shuffle=False)

    y_true, y_pred = get_labels_and_predictions(
        bert_cleaned_tweets, bert_data_loader, encoder, device, True
    )

    # 0 --> negative
    # 1 --> positive
    # 2 --> neutral
    plot_confusion_matrix(y_true, y_pred, f"BERT cleaned tweets - {test_data_name}")