In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import json
import pickle
import pandas as pd
from suicide_data import SuicideDataset
from torch.utils.data import  DataLoader
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import numpy as np
from gensim.models import Word2Vec
from gensim.test.utils import common_texts
from utils import sketch,precision_recall_f1,accuracy_cal,map_to_labels
import copy
from model.attention_bilstm import BiLSTM_Attention
from model.lstm import SentimentLSTM
from model.rnn import SentimentRNN
from model.gru import GRU

Load configuration,data,pretrained embeddings


In [2]:
# Load configuration file
with open('config.json', 'r') as config_file:
    config = json.load(config_file)
    lstm_config = config['LSTM']
    bilstm_config = config['BiLSTM_Attention']
    rnn_config = config['RNN']
    gru_config = config['GRU']

In [24]:
#Load training data
train_df = pd.read_csv("data/train_data.csv")
#Load validation data
val_df = pd.read_csv("data/val_data.csv")
#Load tokenizer object
with open("embeddings/tokenizer.json", "r") as f:
    tokenizer_json = json.load(f)
    tokenizer = tokenizer_from_json(tokenizer_json)
with open("embeddings/CBOW_embeddings.pkl" , "rb") as cbow:
    cbow_embeddings = pickle.load(cbow)
with open("embeddings/SkipGram_embeddings.pkl" , "rb") as sg:
    sg_embeddings = pickle.load(sg)

In [23]:
trainset = SuicideDataset(texts = train_df["cleaned_text"], labels = train_df["class"], tokenizer = tokenizer)
valset = SuicideDataset(val_df["cleaned_text"], val_df["class"], tokenizer)
train_loader = DataLoader(trainset, batch_size= 64,shuffle=True,drop_last=True)
val_loader = DataLoader(valset, batch_size=64, shuffle=True, drop_last=True)

In [8]:
VOCAB_SIZE=len(tokenizer.index_word)+1
# Load the pre-trained Word2Vec model (e.g., Google News vectors)
w2v_model = Word2Vec(sentences=common_texts, vector_size=300, window=5, min_count=1, workers=4)
embedding_dim = w2v_model.vector_size  
embedding_matrix = np.zeros((VOCAB_SIZE, embedding_dim))
#Creating thw embedding_matrix based on W2V model in gensim
for word, i in tokenizer.word_index.items():
    if word in w2v_model.wv:
        embedding_matrix[i] = w2v_model.wv[word]  
    else:
        embedding_matrix[i] = np.random.normal(scale=0.6, size=(embedding_dim,))  

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# Load model and Experimental results


Common train function for all model experiments

In [20]:
def train(model, train_loader, val_loader, epochs, learning_rate):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss().to(device)

    train_losses = []
    val_losses = []

    train_accs = []
    val_accs = []

    train_f1s = []
    val_f1s = []

    # Track best metrics
    best_val_loss = float('inf')
    best_epoch = 0
    best_model_state = None
    best_metrics = []  # List to store all the best metrics in one place

    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        total_train_acc = 0
        total_train_f1 = 0

        # Training phase
        for step, (batch_embeddings, batch_labels) in enumerate(train_loader):
            optimizer.zero_grad()
            batch_embeddings, batch_labels = batch_embeddings.to(device), batch_labels.to(device)

            tag_scores = model(batch_embeddings)
            loss = criterion(tag_scores, batch_labels.float())
            total_train_loss += loss.item()

            acc = accuracy_cal(tag_scores, batch_labels.float())
            total_train_acc += acc

            f1 = precision_recall_f1(tag_scores, batch_labels.float())[2]
            total_train_f1 += f1

            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()

        avg_train_loss = total_train_loss / len(train_loader)
        avg_train_acc = total_train_acc / len(train_loader)
        avg_train_f1 = total_train_f1 / len(train_loader)

        train_losses.append(avg_train_loss)
        train_accs.append(avg_train_acc)
        train_f1s.append(avg_train_f1)

        # Validation phase
        model.eval()
        total_val_loss = 0
        total_val_acc = 0
        total_val_f1 = 0

        with torch.no_grad():
            for batch_embeddings, batch_labels in val_loader:
                batch_embeddings, batch_labels = batch_embeddings.to(device), batch_labels.to(device)
                val_outputs = model(batch_embeddings)

                val_loss = criterion(val_outputs, batch_labels.float())
                total_val_loss += val_loss.item()

                val_acc = accuracy_cal(val_outputs, batch_labels.float())
                total_val_acc += val_acc

                val_f1 = precision_recall_f1(val_outputs, batch_labels.float())[2]
                total_val_f1 += val_f1

        avg_val_loss = total_val_loss / len(val_loader)
        avg_val_acc = total_val_acc / len(val_loader)
        avg_val_f1 = total_val_f1 / len(val_loader)

        val_losses.append(avg_val_loss)
        val_accs.append(avg_val_acc)
        val_f1s.append(avg_val_f1)

        # Update best metrics if current validation loss is better
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_epoch = epoch + 1
            best_model_state = copy.deepcopy(model.state_dict())
            best_metrics = [
                best_epoch, avg_train_loss, best_val_loss,
                avg_train_acc, avg_val_acc,
                avg_train_f1, avg_val_f1
            ]

        print(f'Epoch [{epoch + 1}/{epochs}], '
              f'Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, '
              f'Train Accuracy: {avg_train_acc:.4f}%, Val Accuracy: {avg_val_acc:.4f}%, '
              f'Train F1-Score: {avg_train_f1:.4f}%, Val F1-Score: {avg_val_f1:.4f}%')

    # Load the best model state
    model.load_state_dict(best_model_state)
    print("\nTraining complete!")

    print(f"Best Model at Epoch {best_metrics[0]}: "
          f"Train Loss: {best_metrics[1]:.4f}, Val Loss: {best_metrics[2]:.4f}, "
          f"Train Accuracy: {best_metrics[3]:.4f}%, Val Accuracy: {best_metrics[4]:.4f}%, "
          f"Train F1-Score: {best_metrics[5]:.4f}%, Val F1-Score: {best_metrics[6]:.4f}%")

    return train_losses, val_losses, train_accs, val_accs, train_f1s, val_f1s


# RNN

RNN without preprocessing model

In [33]:
model = SentimentRNN(
    vocab_size= VOCAB_SIZE,
    embedding_dim= rnn_config["embedding_dim"],
    hidden_size=rnn_config["hidden_size"],
    tagset_size=1,
    n_layers=rnn_config["num_layers"],
    dropout_rate=rnn_config['dropout_rate']
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,rnn_config['epochs'],rnn_config['learning_rate'])
sketch("RNN without preprocessing model",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

RNN with CBOW model

In [None]:
model = SentimentRNN(
    vocab_size= VOCAB_SIZE,
    embedding_dim= rnn_config["embedding_dim"],
    hidden_size=rnn_config["hidden_size"],
    tagset_size=1,
    n_layers=rnn_config["num_layers"],
    dropout_rate=rnn_config['dropout_rate'],
    pretrained_embeddings=  cbow_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,rnn_config['epochs'],rnn_config['learning_rate'])
sketch("RNN with CBOW model",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

RNN with Skip Gram model

In [None]:
model = SentimentRNN(
    vocab_size= VOCAB_SIZE,
    embedding_dim= rnn_config["embedding_dim"],
    hidden_size=rnn_config["hidden_size"],
    tagset_size=1,
    n_layers=rnn_config["num_layers"],
    dropout_rate=rnn_config['dropout_rate'],
    pretrained_embeddings=  sg_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,rnn_config['epochs'],rnn_config['learning_rate'])
sketch("RNN with SkipGram model",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

RNN with pretrained W2V in Gensim

In [None]:
model = SentimentRNN(
    vocab_size= VOCAB_SIZE,
    embedding_dim= rnn_config["embedding_dim"],
    hidden_size=rnn_config["hidden_size"],
    tagset_size=1,
    n_layers=rnn_config["num_layers"],
    dropout_rate=rnn_config['dropout_rate'],
    pretrained_embeddings = embedding_matrix
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,rnn_config['epochs'],rnn_config['learning_rate'])
sketch("RNN with pretrained W2V in Gensim",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

# LSTM

LSTM without preprocessing model

In [None]:
model = SentimentLSTM(
    vocab_size= VOCAB_SIZE,
    embedding_dim= lstm_config["embedding_dim"],
    hidden_size=lstm_config["hidden_size"],
    tagset_size=1,
    n_layers=lstm_config["num_layers"],
    dropout_rate=lstm_config['dropout_rate']
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,lstm_config['epochs'],lstm_config['learning_rate'])
sketch("LSTM without preprocessing model",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

LSTM with CBOW

In [None]:
model = SentimentLSTM(
    vocab_size= VOCAB_SIZE,
    embedding_dim= lstm_config["embedding_dim"],
    hidden_size=lstm_config["hidden_size"],
    tagset_size=1,
    n_layers=lstm_config["num_layers"],
    dropout_rate=lstm_config['dropout_rate'],
    pretrained_embeddings= cbow_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,lstm_config['epochs'],lstm_config['learning_rate'])
sketch("LSTM with CBOW",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

LSTM with SKipGram

In [None]:
model = SentimentLSTM(
    vocab_size= VOCAB_SIZE,
    embedding_dim= lstm_config["embedding_dim"],
    hidden_size=lstm_config["hidden_size"],
    tagset_size=1,
    n_layers=lstm_config["num_layers"],
    dropout_rate=lstm_config['dropout_rate'],
    pretrained_embeddings= sg_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,lstm_config['epochs'],lstm_config['learning_rate'])
sketch("LSTM with SkipGram",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

LSTM with pretrained W2V in Gensim

In [None]:
model = SentimentLSTM(
    vocab_size= VOCAB_SIZE,
    embedding_dim= lstm_config["embedding_dim"],
    hidden_size=lstm_config["hidden_size"],
    tagset_size=1,
    n_layers=lstm_config["num_layers"],
    dropout_rate=lstm_config['dropout_rate'],
    pretrained_embeddings=embedding_matrix
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,lstm_config['epochs'],lstm_config['learning_rate'])
sketch("LSTM with pretrained W2V in Gensim",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

# Bi-LSTM with Attenion Layer


BI-LSTM_Attention without preprocessing model


In [None]:
model =BiLSTM_Attention(
    vocab_size= VOCAB_SIZE,
    embedding_dim= bilstm_config["embedding_dim"],
    hidden_size=bilstm_config["hidden_size"],
    tagset_size=1,
    n_layers=bilstm_config["num_layers"],
    dropout_rate=bilstm_config['dropout_rate']
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,bilstm_config['epochs'],bilstm_config['learning_rate'])
sketch("BI-LSTM_Attention without preprocessing model",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

BI-LSTM_Attention with CBOW

In [None]:
model =BiLSTM_Attention(
    vocab_size= VOCAB_SIZE,
    embedding_dim= bilstm_config["embedding_dim"],
    hidden_size=bilstm_config["hidden_size"],
    tagset_size=1,
    n_layers=bilstm_config["num_layers"],
    dropout_rate=bilstm_config['dropout_rate'],
    pretrained_embeddings=cbow_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,bilstm_config['epochs'],bilstm_config['learning_rate'])
sketch("BI-LSTM_Attention with CBOW",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

BI-LSTM_Attention with SKipGram

In [None]:
model =BiLSTM_Attention(
    vocab_size= VOCAB_SIZE,
    embedding_dim= bilstm_config["embedding_dim"],
    hidden_size=bilstm_config["hidden_size"],
    tagset_size=1,
    n_layers=bilstm_config["num_layers"],
    dropout_rate=bilstm_config['dropout_rate'],
    pretrained_embeddings=sg_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,bilstm_config['epochs'],bilstm_config['learning_rate'])
sketch("BI-LSTM_Attention with SKipGram",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

BI-LSTM_Attention with pretrained W2V in Gensim

In [None]:
model =BiLSTM_Attention(
    vocab_size= VOCAB_SIZE,
    embedding_dim= bilstm_config["embedding_dim"],
    hidden_size=bilstm_config["hidden_size"],
    tagset_size=1,
    n_layers=bilstm_config["num_layers"],
    dropout_rate=bilstm_config['dropout_rate'],
    pretrained_embeddings=embedding_matrix
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,bilstm_config['epochs'],bilstm_config['learning_rate'])
sketch("BI-LSTM_Attention with pretrained W2V in Gensim",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

# GRU

GRU without preprocessing model

In [None]:
model =GRU(
    vocab_size= VOCAB_SIZE,
    embedding_dim= gru_config["embedding_dim"],
    hidden_size=gru_config["hidden_size"],
    tagset_size=1,
    n_layers=gru_config["num_layers"],
    dropout_rate=gru_config['dropout_rate']
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,gru_config['epochs'],gru_config['learning_rate'])
sketch("GRU without preprocessing model",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

GRU with CBOW

In [None]:
model =GRU(
    vocab_size= VOCAB_SIZE,
    embedding_dim= gru_config["embedding_dim"],
    hidden_size=gru_config["hidden_size"],
    tagset_size=1,
    n_layers=gru_config["num_layers"],
    dropout_rate=gru_config['dropout_rate'],
    pretrained_embeddings= cbow_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,gru_config['epochs'],gru_config['learning_rate'])
sketch("GRU with CBOW",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

GRU with SKipGram

In [None]:
model =GRU(
    vocab_size= VOCAB_SIZE,
    embedding_dim= gru_config["embedding_dim"],
    hidden_size=gru_config["hidden_size"],
    tagset_size=1,
    n_layers=gru_config["num_layers"],
    dropout_rate=gru_config['dropout_rate'],
    pretrained_embeddings= sg_embeddings
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,gru_config['epochs'],gru_config['learning_rate'])
sketch("GRU with SKipGram",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)

GRU with pretrained W2V in Gensim

In [None]:
model =GRU(
    vocab_size= VOCAB_SIZE,
    embedding_dim= gru_config["embedding_dim"],
    hidden_size=gru_config["hidden_size"],
    tagset_size=1,
    n_layers=gru_config["num_layers"],
    dropout_rate=gru_config['dropout_rate'],
    pretrained_embeddings= embedding_matrix
).to(device)
train_losses,val_losses,train_accs,val_accs, train_f1, val_f1 = train(model, train_loader,val_loader,gru_config['epochs'],gru_config['learning_rate'])
sketch("GRU with pretrained W2V in Gensim",train_losses,val_losses,train_accs,val_accs, train_f1, val_f1)