[Solution 1](https://colab.research.google.com/drive/1lsGHtGv3upQGeeto3p4CDQYi8IOzfCkC?usp=sharing)

Install packages

In [1]:
!pip install torch torchvision
!pip install numpy
!pip install pandas
!pip install google.colab
!pip install nltk

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

# b solution to Evidence Detection
# Sentence-Level Evidence Embedding for Claim Verification with
Hierarchical Attention Networks inspire by [research done by Jing MA, Wei GAO, Shafiq JOTY and Kam-Fai Wong at Singapore Management Universtiy](https://ink.library.smu.edu.sg/sis_research/4557/)

It include the coherence-based attention, entailment-based attention, and the overall hierarchical structure for processing and integrating evidence for claim verification. The approach attempts to assess consistency across each sentence


Getting the data files

In [3]:
from google.colab import files
import pandas as pd
import numpy as np
import io
import os
import nltk
from nltk.tokenize import word_tokenize
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F

if not os.path.exists('train.csv'):
  uploaded = files.upload()


KeyboardInterrupt: 

In [None]:
df = pd.read_csv('train.csv')
val = pd.read_csv('dev.csv')

The paper also used embeddings. GloVe provides an excellent balance of speed and performance, with no need for additional training. Is it is good for datasets that don't have a lot of specialised knowledge. Due to GloVes speed it was chosen, after attempting a probably better but very slow BERT embeddings.

First, obtain the GloVe embeddings and load them into memory, then load into the program

In [None]:
# checks if glove embeddings are already loaded
if not os.path.exists('glove.6B.zip'):
    !wget http://nlp.stanford.edu/data/glove.6B.zip
    !unzip glove.6B.zip -d glove.6B

def load_glove_embeddings(path):
    embeddings_dict = {}
    with open(path, 'r', encoding='utf-8') as file:
        for line in file:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], "float32")
            embeddings_dict[word] = vector
    return embeddings_dict

glove_path = 'glove.6B/glove.6B.100d.txt'
glove_embeddings = load_glove_embeddings(glove_path)


--2024-04-24 13:58:46--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... failed: Connection timed out.
Retrying.

--2024-04-24 14:00:56--  (try: 2)  http://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... failed: Connection timed out.
Retrying.

--2024-04-24 14:03:08--  (try: 3)  http://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... failed: Connection timed out.
Retrying.

--2024-04-24 14:05:20--  (try: 4)  http://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... failed: Connection timed out.
Retrying.

--2024-04-24 14:07:36--  (try: 5)  http://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... failed: Connection timed out.
Retrying.

--

 Processing text data using pre-trained word embeddings and preparing it for input into a model

In [None]:

nltk.download('punkt')  # Downloads the punkt tokenizer models

def simple_tokenize(text):
    return word_tokenize(text.lower())  # Tokenizes and converts to lower case

#This function converts text into a fixed-length array of embeddings
def text_to_embeddings(text, embeddings_dict, max_length):
    tokens = simple_tokenize(text)
    # It retrieves embeddings for each token from embeddings_dict
    # If a word is not found, it uses a zero vector of size 100 as a placeholder.
    embeddings = [embeddings_dict.get(token, np.zeros(100)) for token in tokens[:max_length]]
    # Ensures the resulting list of embeddings has a length of max_length
    if len(embeddings) < max_length:
        embeddings += [np.zeros(100)] * (max_length - len(embeddings))
    return np.array(embeddings)

# Custom class that extends PyTorch’s Dataset class.
# It is designed to handle loading and transforming text data for a model
class TextDataset(Dataset):
    def __init__(self, embeddings_dict, df, max_length=256):
        self.embeddings_dict = embeddings_dict
        self.claims = df['Claim'].tolist()
        self.evidences = df['Evidence'].tolist()
        self.labels = df['label'].tolist()
        self.max_length = max_length

    def __len__(self):
        return len(self.claims)

    # retrieves the combined claim and its evidence converts this text to an embedding tensor. Needed for dataLoader
    def __getitem__(self, idx):
        text = self.claims[idx] + " " + self.evidences[idx]
        embeddings = text_to_embeddings(text, self.embeddings_dict, self.max_length)
        embeddings_tensor = torch.tensor(embeddings, dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)

        return embeddings_tensor, label

train_dataset = TextDataset(glove_embeddings, df, max_length=256)
val_dataset = TextDataset(glove_embeddings, val, max_length=256)

# Dataloader instances manage the batching of data and can shuffle the data for the training dataset.
# This helps efficiently manage memory and speeds up the training process by using processing capabilities of PyTorch.
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

SequenceAttentionNetwork implementation, along with the key PyTorch components used in setting up the model and preparing it for training.

In [None]:
class SequenceAttentionNetwork(nn.Module):
    def __init__(self, embedding_dim, hidden_size, num_classes):
        super(SequenceAttentionNetwork, self).__init__()
        # initialize recurrent neural network layer suited for sequence processing
        self.gru = nn.GRU(embedding_dim, hidden_size, batch_first=True)
        # A linear layer that transforms the output of the GRU to produce a coherence weight for each element in the sequence.
        self.coherence_attention = nn.Linear(hidden_size, hidden_size)
        # Another linear layer that outputs a single weight per sequence element, used to assess how strongly each element entails or contradicts a claim.
        self.entailment_attention = nn.Linear(hidden_size, 1)
        # The final classification layer that predicts the class of the input sequence based on the attended features.
        self.classifier = nn.Linear(hidden_size, num_classes)

    # The input to the forward method is a batch of embeddings which is pre-processed text data.
    def forward(self, embeddings):
        # The GRU outputs a new representation that captures dependencies in the sequence.
        gru_out, _ = self.gru(embeddings)

        # computes a softmax over the linear transformation of GRU outputs, weighting each part of the sequence by its coherence.
        coherence_weights = F.softmax(self.coherence_attention(gru_out), dim=1)
        coherence_context = torch.sum(coherence_weights * gru_out, dim=1)
        # similar as previous but entailment weights
        entailment_weights = F.softmax(self.entailment_attention(gru_out), dim=1)
        entailment_context = torch.sum(entailment_weights * gru_out, dim=1)

        combined_context = coherence_context + entailment_context
        # Features are fed into a classifier to produce the final class logits.
        logits = self.classifier(combined_context)
        return logits

#This optimizes training by leveraging GPU acceleration when available. If gpu is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
han = SequenceAttentionNetwork(embedding_dim = 100, hidden_size=256, num_classes=2).to(device)

# used to optimize the model parameters with a set learning rate
optimizer = torch.optim.Adam(han.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

The entire training and validation process, best model is saved

In [None]:
#This function orchestrates the entire training and validation process for a neural network.
def train_and_evaluate(model, train_loader, val_loader, optimizer, criterion, num_epochs=3, device='cuda'):
    best_val_accuracy = 0

    #The function loops over the dataset multiple times
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        total_train_loss = 0

        #In each epoch, iterates over the training data, moving the embeddings and labels to the specified device.
        for embeddings, labels in train_loader:
            embeddings, labels = embeddings.to(device), labels.to(device)

            optimizer.zero_grad() #The optimizer's gradients are zeroed out at the start of each batch to prevent accumulation from previous iterations.
            outputs = model(embeddings) #The model makes predictions on the training batch, and the loss is computed.
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        #After each epoch, the model is evaluated using the evaluate_model function on the validation set.
        # Performance metrics (validation loss and accuracy) are calculated and displayed.
        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion, device)
        print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

        # Save the best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_model_b.pth')
            print(f"Saved new best model with Validation Accuracy: {val_accuracy:.4f}")
# This function evaluates the model's performance on a given dataset, typically the validation or test set.
def evaluate_model(model, dataloader, criterion, device='cuda'):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    correct_predictions = 0
    #Gradient calculations are disabled to save memory and computations
    with torch.no_grad():
        for embeddings, labels in dataloader:
            embeddings, labels = embeddings.to(device), labels.to(device)
            outputs = model(embeddings)
            loss = criterion(outputs, labels) # computes the loss and accumulates it to calculate the average
            total_loss += loss.item()
            predictions = torch.argmax(outputs, dim=1)# Predictions are made, and accuracy is calculated by comparing the predictions to the actual labels.
            correct_predictions += torch.sum(predictions == labels).item()

    avg_loss = total_loss / len(dataloader)
    accuracy = correct_predictions / len(dataloader.dataset)
    #Returns the average loss and accuracy, providing metrics on how well the model performed on the dataset.
    return avg_loss, accuracy

train_and_evaluate(han, train_loader, val_loader, optimizer, criterion, num_epochs=10, device=device)
