In [1]:
import numpy as np
from collections import Counter
!pip3 install contractions
import contractions
import string
import re
import torch
import sys
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting textsearch>=0.0.21 (from contractions)
  Downloading textsearch-0.0.24-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting anyascii (from textsearch>=0.0.21->contractions)
  Downloading anyascii-0.3.2-py3-none-any.whl.metadata (1.5 kB)
Collecting pyahocorasick (from textsearch>=0.0.21->contractions)
  Downloading pyahocorasick-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading contractions-0.1.73-py2.py3-none-any.whl (8.7 kB)
Downloading textsearch-0.0.24-py2.py3-none-any.whl (7.6 kB)
Downloading anyascii-0.3.2-py3-none-any.whl (289 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.9/289.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyahocorasick-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (118 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.3/118.3 kB[0m 

In [2]:
class create_dataset(Dataset):
    def __init__(self, data_path, threshold=3, vocab=None, word2idx=None, idx2word=None):
        self.data_path = data_path
        self.threshold = threshold
        self.sentences = None
        self.vocab = vocab
        self.word2idx = word2idx
        self.idx2word = idx2word
        self.max_length = None
        self.X_forward = None
        self.X_backward = None
        self.y_forward = None
        self.y_backward = None
        self.preprocess_data()
        if vocab is None:
            self.create_vocab()
        self.get_max_length()
        self.padding()
        self.create_training_data()

    def preprocess_data(self):
        data = pd.read_csv(self.data_path)
        sentences = data["Description"].values
        sentences = [contractions.fix(sentence) for sentence in sentences]
        sentences = [sentence.lower() for sentence in sentences]
        sentences = [re.sub(r'http\S+', 'URL', sentence) for sentence in sentences]
        sentences = [re.sub(r'www\S+', 'URL', sentence) for sentence in sentences]
        sentences = [sentence.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation))) for sentence in sentences]
        sentences = [(sentence.split()) for sentence in sentences]
        sentences = [['<s>'] + sentence + ['</s>'] for sentence in sentences]
        self.sentences = sentences

    def create_vocab(self):
        words = [word for sentence in self.sentences for word in sentence]
        word_freq = Counter(words)
        vocab = [word for word, freq in word_freq.items() if freq >= self.threshold]
        vocab = ['<pad>', '<unk>'] + vocab
        self.vocab = vocab
        self.word2idx = {word: idx for idx, word in enumerate(vocab)}
        self.idx2word = {idx: word for word, idx in self.word2idx.items()}

    def get_max_length(self):
        self.max_length = int(self.get_n_percentile_sentence_length(95))

    def get_n_percentile_sentence_length(self, percentile):
        sentence_lengths = [len(sentence) for sentence in self.sentences]
        return np.percentile(sentence_lengths, percentile)

    def padding(self):
        padded_sentences = []
        for sentence in self.sentences:
            padded_sentence = [self.word2idx[word] if word in self.word2idx else self.word2idx['<unk>'] for word in sentence]
            if len(padded_sentence) < self.max_length:
                padded_sentence += [self.word2idx['<pad>']] * int(self.max_length - len(padded_sentence))
                padded_sentences.append(padded_sentence)
            else:
                padded_sentences.append(padded_sentence[:self.max_length])

        self.sentences = padded_sentences

    def create_training_data(self):
        X_forward = []
        X_backward = []
        y_forward = []
        y_backward = []
        for sentence in self.sentences:
            X_forward.append(sentence[:-1])
            X_backward.append(sentence[::-1][:-1])
            y_forward.append(sentence[1:])
            y_backward.append(sentence[::-1][1:])

        self.X_forward = torch.tensor(X_forward)
        self.X_backward = torch.tensor(X_backward)
        self.y_forward = torch.tensor(y_forward)
        self.y_backward = torch.tensor(y_backward)

    def __len__(self):
        return len(self.X_forward)

    def __getitem__(self, idx):
        return self.X_forward[idx], self.X_backward[idx], self.y_forward[idx], self.y_backward[idx]

class Elmo(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(Elmo, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm_forward1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm_forward2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.lstm_backward1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm_backward2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.fc_forward = nn.Linear(hidden_dim, vocab_size)
        self.fc_backward = nn.Linear(hidden_dim, vocab_size)

    def forward(self, X_forward, X_backward):
        forward_embedding = self.embedding(X_forward)
        backward_embedding = self.embedding(X_backward)
        forward_lstm1, _ = self.lstm_forward1(forward_embedding)
        backward_lstm1, _ = self.lstm_backward1(backward_embedding)
        forward_lstm2, _ = self.lstm_forward2(forward_lstm1)
        backward_lstm2, _ = self.lstm_backward2(backward_lstm1)
        forward_output = self.fc_forward(forward_lstm2)
        backward_output = self.fc_backward(backward_lstm2)
        return forward_output, backward_output


def train_elmo(model, train_loader, device, vocab_size, epochs=10):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    losses = []
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            X_forward, X_backward, y_forward, y_backward = data
            X_forward, X_backward, y_forward, y_backward = X_forward.to(device), X_backward.to(device), y_forward.to(device), y_backward.to(device)
            optimizer.zero_grad()
            forward_output, backward_output = model(X_forward, X_backward)
            y_forward_one_hot = torch.nn.functional.one_hot(y_forward, num_classes=vocab_size).float()
            y_backward_one_hot = torch.nn.functional.one_hot(y_backward, num_classes=vocab_size).float()
            forward_output = forward_output.permute(0, 2, 1)
            backward_output = backward_output.permute(0, 2, 1)
            y_forward_one_hot = y_forward_one_hot.permute(0, 2, 1)
            y_backward_one_hot = y_backward_one_hot.permute(0, 2, 1)
            forward_loss = criterion(forward_output, y_forward_one_hot)
            backward_loss = criterion(backward_output, y_backward_one_hot)
            loss = forward_loss + backward_loss
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        losses.append(running_loss/len(train_loader))
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

    return losses, model

In [3]:
data_path = 'data/train.csv'
threshold = 3
dataset = create_dataset(data_path, threshold)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

test_data_path = 'data/test.csv'
test_dataset = create_dataset(test_data_path, threshold, vocab=dataset.vocab, word2idx=dataset.word2idx, idx2word=dataset.idx2word)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

vocab_size = len(dataset.word2idx)
embedding_dim = 150
hidden_dim = 150
model = Elmo(vocab_size, embedding_dim, hidden_dim)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
losses, model = train_elmo(model, train_loader, device, vocab_size, epochs=10)

torch.save(model, 'model.pt')

torch.save(dataset.word2idx, 'word2idx.pt')
torch.save(dataset.idx2word, 'idx2word.pt')

Epoch 1, Loss: 8.426731818771362
Epoch 2, Loss: 7.098492701085409
Epoch 3, Loss: 6.5969685976664225
Epoch 4, Loss: 6.303259781901041
Epoch 5, Loss: 6.0989046332041426
Epoch 6, Loss: 5.94296741587321
Epoch 7, Loss: 5.8170874876658125
Epoch 8, Loss: 5.711459554672241
Epoch 9, Loss: 5.619714071019491
Epoch 10, Loss: 5.540601449966431


In [4]:
class Create_dataset_classification(Dataset):
    def __init__(self, data_path, word2idx, idx2word):
        self.data_path = data_path
        self.word2idx = word2idx
        self.idx2word = idx2word
        self.sentences = None
        self.labels = None
        self.num_classes = None
        self.max_length = None
        self.X = None
        self.Y = None
        self.preprocess_data()
        self.get_max_length()
        self.padding()
        self.create_training_data()

    def preprocess_data(self):
        data = pd.read_csv(self.data_path)
        sentences = data["Description"].values
        self.labels = data["Class Index"].values
        self.labels = [label - 1 for label in self.labels]
        self.num_classes = len(set(self.labels))
        self.labels = torch.nn.functional.one_hot(torch.tensor(self.labels), num_classes=self.num_classes).float()
        sentences = [contractions.fix(sentence) for sentence in sentences]
        sentences = [sentence.lower() for sentence in sentences]
        sentences = [re.sub(r'http\S+', 'URL', sentence) for sentence in sentences]
        sentences = [re.sub(r'www\S+', 'URL', sentence) for sentence in sentences]
        sentences = [sentence.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation))) for sentence in sentences]
        sentences = [(sentence.split()) for sentence in sentences]
        sentences = [['<s>'] + sentence + ['</s>'] for sentence in sentences]
        self.sentences = sentences

    def get_max_length(self):
        self.max_length = int(self.get_n_percentile_sentence_length(95))

    def get_n_percentile_sentence_length(self, percentile):
        sentence_lengths = [len(sentence) for sentence in self.sentences]
        return np.percentile(sentence_lengths, percentile)

    def padding(self):
        padded_sentences = []
        for sentence in self.sentences:
            padded_sentence = [self.word2idx[word] if word in self.word2idx else self.word2idx['<unk>'] for word in sentence]
            if len(padded_sentence) < self.max_length:
                padded_sentence += [self.word2idx['<pad>']] * int(self.max_length - len(padded_sentence))
                padded_sentences.append(padded_sentence)
            else:
                padded_sentences.append(padded_sentence[:self.max_length])

        self.sentences = padded_sentences

    def create_training_data(self):
        X = []
        for sentence in self.sentences:
            X.append(sentence)

        self.X = torch.tensor(X)
        self.Y = self.labels

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]


class function(nn.Module):
    def __init__(self, input_dim,output_dim, activation='relu'):
        super(function, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
    def forward(self, e_0, h_0, h_1):
        x = torch.cat((e_0, h_0, h_1), dim=2)
        x = self.fc1(x)
        x = self.activation(x)
        return x


class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, bidirectional, device,method, activation='relu'):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.device = device
        self.bidirectional = bidirectional
        self.method = method
        if self.method == '1':
            self.lamda1 = nn.Parameter(torch.randn(1), requires_grad=True)
            self.lamda2 = nn.Parameter(torch.randn(1), requires_grad=True)
            self.lamda3 = nn.Parameter(torch.randn(1), requires_grad=True)
        elif self.method == '2':
            self.lamda1 = nn.Parameter(torch.randn(1), requires_grad=False)
            self.lamda2 = nn.Parameter(torch.randn(1), requires_grad=False)
            self.lamda3 = nn.Parameter(torch.randn(1), requires_grad=False)
        else:
            self.func = function(input_dim*3, input_dim)

        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, bidirectional=bidirectional, batch_first=True)
        if bidirectional:
            self.fc = nn.Linear(hidden_dim * 2, output_dim)
        else:
            self.fc = nn.Linear(hidden_dim, output_dim)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()

    def forward(self, e_0, h_0, h_1):
        if self.method == '1':
            x = self.lamda1 * e_0 + self.lamda2 * h_0 + self.lamda3 * h_1
        elif self.method == '2':
            x = self.lamda1 * e_0 + self.lamda2 * h_0 + self.lamda3 * h_1
        else:
            x = self.func(e_0, h_0, h_1)
        h0 = torch.zeros(self.n_layers * 2 if self.bidirectional else 1, x.size(0), self.hidden_dim).to(self.device)
        c0 = torch.zeros(self.n_layers * 2 if self.bidirectional else 1, x.size(0), self.hidden_dim).to(self.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.activation(out)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

def train_classifier(model, elmo_model, train_loader,val_loader, device, lr, epochs=10):
    model.to(device)
    elmo_model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    losses = []
    val_losses = []
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(train_loader):
            X, y = data
            X, y = X.to(device), y.to(device)
            X_flip = torch.flip(X, [1])
            e_f = elmo_model.embedding(X)
            e_b = elmo_model.embedding(X_flip)
            forward_lstm1,_ = elmo_model.lstm_forward1(e_f)
            backward_lstm1,_ = elmo_model.lstm_backward1(e_b)
            forward_lstm2,_ = elmo_model.lstm_forward2(forward_lstm1)
            backward_lstm2,_ = elmo_model.lstm_backward2(backward_lstm1)
            h_0 = torch.cat((forward_lstm1, backward_lstm1), dim=2)
            h_1 = torch.cat((forward_lstm2, backward_lstm2), dim=2)
            e_0 = torch.cat((e_f, e_b), dim=2)
            y_pred = model(e_0, h_0, h_1)
            loss = criterion(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        losses.append(running_loss/len(train_loader))

        val_running_loss = 0.0
        for i, data in enumerate(val_loader):
            X, y = data
            X, y = X.to(device), y.to(device)
            X_flip = torch.flip(X, [1])
            e_f = elmo_model.embedding(X)
            e_b = elmo_model.embedding(X_flip)
            forward_lstm1,_ = elmo_model.lstm_forward1(e_f)
            backward_lstm1,_ = elmo_model.lstm_backward1(e_b)
            forward_lstm2,_ = elmo_model.lstm_forward2(forward_lstm1)
            backward_lstm2,_ = elmo_model.lstm_backward2(backward_lstm1)
            h_0 = torch.cat((forward_lstm1, backward_lstm1), dim=2)
            h_1 = torch.cat((forward_lstm2, backward_lstm2), dim=2)
            e_0 = torch.cat((e_f, e_b), dim=2)
            y_pred = model(e_0, h_0, h_1)
            loss = criterion(y_pred, y)
            val_running_loss += loss.item()

        val_losses.append(val_running_loss/len(val_loader))

        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Val Loss: {val_running_loss/len(val_loader)}')

    return losses, val_losses, model

def get_predictions(model, elmomodel, data_loader, device):
    predictions = []
    ground_truth = []
    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputs_flip = torch.flip(inputs, [1])
        e_f = elmomodel.embedding(inputs)
        e_b = elmomodel.embedding(inputs_flip)
        forward_lstm1,_ = elmomodel.lstm_forward1(e_f)
        backward_lstm1,_ = elmomodel.lstm_backward1(e_b)
        forward_lstm2,_ = elmomodel.lstm_forward2(forward_lstm1)
        backward_lstm2,_ = elmomodel.lstm_backward2(backward_lstm1)
        h_0 = torch.cat((forward_lstm1, backward_lstm1), dim=2)
        h_1 = torch.cat((forward_lstm2, backward_lstm2), dim=2)
        e_0 = torch.cat((e_f, e_b), dim=2)
        outputs = model(e_0, h_0, h_1)
        predictions.extend(outputs.argmax(dim=1).cpu().numpy())
        ground_truth.extend(targets.argmax(dim=1).cpu().numpy())
    return predictions, ground_truth

def get_metrics(predictions, ground_truth):
    accuracy = accuracy_score(ground_truth, predictions)
    f1 = f1_score(ground_truth, predictions, average='weighted')
    precision = precision_score(ground_truth, predictions, average='weighted')
    recall = recall_score(ground_truth, predictions, average='weighted')
    cm = confusion_matrix(ground_truth, predictions)
    return accuracy, f1, precision, recall, cm

In [5]:
data_path = 'data/train.csv'
test_data_path = 'data/test.csv'

word2idx = torch.load('word2idx.pt')
idx2word = torch.load('idx2word.pt')
dataset = Create_dataset_classification(data_path, word2idx, idx2word)
test_dataset = Create_dataset_classification(test_data_path, word2idx, idx2word)

train_size = int(0.8 * len(dataset))
valid_size = len(dataset) - train_size
train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
elmomodel = torch.load('model.pt',weights_only=False)

  word2idx = torch.load('word2idx.pt')
  idx2word = torch.load('idx2word.pt')


In [6]:
# Train and save model for method 1
model_method1 = LSTMClassifier(input_dim=300, hidden_dim=128, output_dim=dataset.num_classes, n_layers=2, bidirectional=True, device=device, method='1')
loss_method1, val_loss_method1, model_method1 = train_classifier(model_method1, elmomodel, train_loader, val_loader, device, 0.001, 5)
torch.save(model_method1, 'classification_model_method1.pt')
print("Model for method 1 saved as 'classification_model_method1.pt'")

Epoch 1, Loss: 0.4037661294204493, Val Loss: 0.31051146374146144
Epoch 2, Loss: 0.2934947016617904, Val Loss: 0.293622295593222
Epoch 3, Loss: 0.2545347608998418, Val Loss: 0.2892797255888581
Epoch 4, Loss: 0.2166401638649404, Val Loss: 0.30148572623233
Epoch 5, Loss: 0.18255442553913842, Val Loss: 0.2960284621516864
Model for method 1 saved as 'classification_model_method1.pt'


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [7]:
# Train and save model for method 2
model_method2 = LSTMClassifier(input_dim=300, hidden_dim=128, output_dim=dataset.num_classes, n_layers=2, bidirectional=True, device=device, method='2')
loss_method2, val_loss_method2, model_method2 = train_classifier(model_method2, elmomodel, train_loader, val_loader, device, 0.001, 5)
torch.save(model_method2, 'classification_model_method2.pt')
print("Model for method 2 saved as 'classification_model_method2.pt'")

Epoch 1, Loss: 0.40658972298726437, Val Loss: 0.32405326905846593
Epoch 2, Loss: 0.2954547654812535, Val Loss: 0.28800793845951556
Epoch 3, Loss: 0.2433824145719409, Val Loss: 0.2805382801989714
Epoch 4, Loss: 0.19688450227243204, Val Loss: 0.2847843968520562
Epoch 5, Loss: 0.15430494388192892, Val Loss: 0.31919645653665063
Model for method 2 saved as 'classification_model_method2.pt'


In [8]:
# Train and save model for method 3
model_method3 = LSTMClassifier(input_dim=300, hidden_dim=128, output_dim=dataset.num_classes, n_layers=2, bidirectional=True, device=device, method='3')
loss_method3, val_loss_method3, model_method3 = train_classifier(model_method3, elmomodel, train_loader, val_loader, device, 0.001, 5)
torch.save(model_method3, 'classification_model_method3.pt')
print("Model for method 3 saved as 'classification_model_method3.pt'")

Epoch 1, Loss: 0.3925129942620794, Val Loss: 0.32220088549455006
Epoch 2, Loss: 0.2967975326317052, Val Loss: 0.2809548254013062
Epoch 3, Loss: 0.26692185472945373, Val Loss: 0.2694308439393838
Epoch 4, Loss: 0.24137349142382541, Val Loss: 0.3036996497809887
Epoch 5, Loss: 0.22040396809950472, Val Loss: 0.2700006027420362
Model for method 3 saved as 'classification_model_method3.pt'


In [11]:
class Create_dataset_classification(Dataset):
    def __init__(self, sentences, word2idx, idx2word):
        self.word2idx = word2idx
        self.idx2word = idx2word
        self.sentences = sentences
        self.max_length = None
        self.X = None
        self.preprocess_data()
        self.get_max_length()
        self.padding()
        self.create_training_data()

    def preprocess_data(self):
        sentences = [contractions.fix(sentence) for sentence in self.sentences]
        sentences = [sentence.lower() for sentence in sentences]
        sentences = [re.sub(r'http\S+', 'URL', sentence) for sentence in sentences]
        sentences = [re.sub(r'www\S+', 'URL', sentence) for sentence in sentences]
        sentences = [sentence.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation))) for sentence in sentences]
        sentences = [(sentence.split()) for sentence in sentences]
        sentences = [['<s>'] + sentence + ['</s>'] for sentence in sentences]
        self.sentences = sentences

    def get_max_length(self):
        sentence_lengths = [len(sentence) for sentence in self.sentences]
        self.max_length = int(np.percentile(sentence_lengths, 95))

    def padding(self):
        padded_sentences = []
        for sentence in self.sentences:
            padded_sentence = [self.word2idx[word] if word in self.word2idx else self.word2idx['<unk>'] for word in sentence]
            if len(padded_sentence) < self.max_length:
                padded_sentence += [self.word2idx['<pad>']] * int(self.max_length - len(padded_sentence))
                padded_sentences.append(padded_sentence)
            else:
                padded_sentences.append(padded_sentence[:self.max_length])

        self.sentences = padded_sentences

    def create_training_data(self):
        X = []
        for sentence in self.sentences:
            X.append(sentence)

        self.X = torch.tensor(X)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx]

class LSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, bidirectional, device, method, activation='relu'):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.device = device
        self.bidirectional = bidirectional
        self.method = method
        if self.method == '1':
            self.lamda1 = nn.Parameter(torch.randn(1), requires_grad=True)
            self.lamda2 = nn.Parameter(torch.randn(1), requires_grad=True)
            self.lamda3 = nn.Parameter(torch.randn(1), requires_grad=True)
        elif self.method == '2':
            self.lamda1 = nn.Parameter(torch.randn(1), requires_grad=False)
            self.lamda2 = nn.Parameter(torch.randn(1), requires_grad=False)
            self.lamda3 = nn.Parameter(torch.randn(1), requires_grad=False)
        else:
            self.func = function(input_dim*3, input_dim)

        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, bidirectional=bidirectional, batch_first=True)
        if bidirectional:
            self.fc = nn.Linear(hidden_dim * 2, output_dim)
        else:
            self.fc = nn.Linear(hidden_dim, output_dim)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()

    def forward(self, e_0, h_0, h_1):
        if self.method == '1':
            x = self.lamda1 * e_0 + self.lamda2 * h_0 + self.lamda3 * h_1
        elif self.method == '2':
            x = self.lamda1 * e_0 + self.lamda2 * h_0 + self.lamda3 * h_1
        else:
            x = self.func(e_0, h_0, h_1)
        h0 = torch.zeros(self.n_layers * 2 if self.bidirectional else 1, x.size(0), self.hidden_dim).to(self.device)
        c0 = torch.zeros(self.n_layers * 2 if self.bidirectional else 1, x.size(0), self.hidden_dim).to(self.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.activation(out)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

def main():
    if len(sys.argv) != 3:
        print("Usage: python inference.py <saved model path> <description>")
        sys.exit(1)

    model_path = 'classification_model_method1.pt'
    description = 'Unions representing workers at Turner Newall say they are disappointed after talks with stricken parent firm Federal Mogul.'

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load the saved models and word2idx
    word2idx = torch.load('word2idx.pt')
    idx2word = torch.load('idx2word.pt')
    elmo_model = torch.load('model.pt',weights_only=False)
    classifier_model = torch.load(model_path,weights_only=False)

    # Preprocess the input description
    dataset = Create_dataset_classification([description], word2idx, idx2word)
    data_loader = DataLoader(dataset, batch_size=1, shuffle=False)

    # Get the predictions
    classifier_model.eval()
    elmo_model.eval()
    with torch.no_grad():
        for X in data_loader:
            X = X.to(device)
            X_flip = torch.flip(X, [1])
            e_f = elmo_model.embedding(X)
            e_b = elmo_model.embedding(X_flip)
            forward_lstm1, _ = elmo_model.lstm_forward1(e_f)
            backward_lstm1, _ = elmo_model.lstm_backward1(e_b)
            forward_lstm2, _ = elmo_model.lstm_forward2(forward_lstm1)
            backward_lstm2, _ = elmo_model.lstm_backward2(backward_lstm1)
            h_0 = torch.cat((forward_lstm1, backward_lstm1), dim=2)
            h_1 = torch.cat((forward_lstm2, backward_lstm2), dim=2)
            e_0 = torch.cat((e_f, e_b), dim=2)
            outputs = classifier_model(e_0, h_0, h_1)
            probabilities = torch.softmax(outputs, dim=1).squeeze().cpu().numpy()

    for i, prob in enumerate(probabilities):
        print(f'class-{i+1} {prob:.4f}')

if __name__ == "__main__":
    main()

class-1 0.3386
class-2 0.0001
class-3 0.3957
class-4 0.2657


  word2idx = torch.load('word2idx.pt')
  idx2word = torch.load('idx2word.pt')
