[Solution 2](https://colab.research.google.com/drive/1p8U798dCxC1aElkVtE2oO5-UQ8VYhvtA?usp=sharing)

#Install packages

In [1]:
!pip install torch torchvision
!pip install numpy
!pip install pandas
!pip install d2l==1.0.3
!pip install google.colab
!pip install nltk

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin



#Imports

In [1]:
import pandas as pd
import io
import os
import math
import torch
from collections import Counter
from torch.utils.data import DataLoader, Dataset
import numpy as np
from torch import nn
from d2l import torch as d2l
from google.colab import files
from torch.nn.utils.rnn import pad_sequence
from torch.optim import Optimizer
from torch.nn import CrossEntropyLoss
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import word_tokenize

#Load data

In [28]:
if not os.path.exists('test.csv') or not os.path.exists('model_epoch_14.pth'):
  uploaded = files.upload()

if not os.path.exists('glove.6B.zip'):
    !wget http://nlp.stanford.edu/data/glove.6B.zip
    !unzip glove.6B.zip -d glove.6B

--2024-04-24 16:12:53--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... ^C
unzip:  cannot find or open glove.6B.zip, glove.6B.zip.zip or glove.6B.zip.ZIP.


In [29]:
nltk.download('punkt')  # Downloads the punkt tokenizer models

def simple_tokenize(text):
    return word_tokenize(text.lower())  # Tokenizes and converts to lower case

#This function converts text into a fixed-length array of embeddings
def text_to_embeddings(text, embeddings_dict, max_length):
    tokens = word_tokenize(text.lower())
    # Fetch embeddings or zero vectors for each token
    embeddings = [embeddings_dict.get(token, np.zeros(100)) for token in tokens[:max_length]]

    # Ensure all embeddings have the same length by padding with zeros if necessary
    if len(embeddings) < max_length:
        # Pad with zero vectors of the same dimension as the embeddings
        embeddings.extend([np.zeros(100) for _ in range(max_length - len(embeddings))])

    # Convert list of embeddings to a single NumPy array ensuring it's float32 for compatibility with PyTorch
    return np.vstack(embeddings).astype(np.float32)

# Custom class that extends PyTorch’s Dataset class.
# It is designed to handle loading and transforming text data for a model
class TextDataset(Dataset):
    def __init__(self, embeddings_dict, df, max_length=256):
        self.embeddings_dict = embeddings_dict
        self.claims = df['Claim'].tolist()
        self.evidences = df['Evidence'].tolist()
        self.max_length = max_length

    def __len__(self):
        return len(self.claims)

    # retrieves the combined claim and its evidence converts this text to an embedding tensor. Needed for dataLoader
    def __getitem__(self, idx):
        # Convert both claims and evidences to string to prevent TypeError
        claim = str(self.claims[idx])
        evidence = str(self.evidences[idx])
        text = claim + " " + evidence
        embeddings = text_to_embeddings(text, self.embeddings_dict, self.max_length)
        embeddings_tensor = torch.tensor(embeddings, dtype=torch.float32)

        return embeddings_tensor

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [30]:
class TransformerEncoderBlock(nn.Module):
    """The Transformer encoder block."""
    def __init__(self, num_hiddens, ffn_num_hiddens, num_heads, dropout,
                 use_bias=False):
        super().__init__()
        #Transformer encoder model, based on d2l.
        #Consist of multi-headed attention layer, add norm, positionwise FFN and another add norm.
        self.attention = d2l.MultiHeadAttention(num_hiddens, num_heads,
                                                dropout, use_bias)
        self.addnorm1 = d2l.AddNorm(num_hiddens, dropout)
        self.ffn = d2l.PositionWiseFFN(ffn_num_hiddens, num_hiddens)
        self.addnorm2 = d2l.AddNorm(num_hiddens, dropout)

    def forward(self, X):
        Y = self.addnorm1(X, self.attention(X, X, X, valid_lens = None))
        return self.addnorm2(Y, self.ffn(Y))

In [31]:
class TransformerEncoder(d2l.Encoder):
    """The Transformer encoder."""
    def __init__(self, num_hiddens, ffn_num_hiddens,
                 num_heads, num_blks, dropout, use_bias=False):
        super().__init__()
        self.num_hiddens = num_hiddens
        self.pos_encoding = d2l.PositionalEncoding(num_hiddens, dropout)
        self.blks = nn.Sequential()
        for i in range(num_blks):
            self.blks.add_module("block"+str(i), TransformerEncoderBlock(
                num_hiddens, ffn_num_hiddens, num_heads, dropout, use_bias))

    def forward(self, X):
        #Positional encoding is applied to the GloVe embedded words once, before passing it into the encoder blocks.
        #Here we have 2 blocks
        X = self.pos_encoding(X * math.sqrt(self.num_hiddens))
        self.attention_weights = [None] * len(self.blks)
        for i, blk in enumerate(self.blks):
            X = blk(X)
            self.attention_weights[
                i] = blk.attention.attention.attention_weights
        return X

In [32]:
class ClassificationHead(nn.Module):
    """A simple classification head with a single linear layer."""
    def __init__(self, num_hiddens, num_classes):
        super().__init__()
        self.linear = nn.Linear(num_hiddens, num_classes)

    def forward(self, X):
        #Classification layer, produces output of 1 or 0.
        return self.linear(X[:, 0, :])

In [33]:
class EvidenceDetectionModel(nn.Module):
    """This class combines the transformer encoder with the classification head."""
    def __init__(self, num_hiddens, ffn_num_hiddens, num_heads,
                 num_blks, dropout, num_classes=2, use_bias=False):
        super().__init__()
        #Full combined model, with data being passed through encoder before the final classification layer.
        self.encoder = TransformerEncoder(num_hiddens, ffn_num_hiddens,
                                          num_heads, num_blks, dropout, use_bias)
        self.classifier = ClassificationHead(num_hiddens, num_classes)

    def forward(self, X):
        encoded_X = self.encoder(X)
        classification_logits = self.classifier(encoded_X)
        return classification_logits

In [35]:
def load_glove_embeddings(path):
    embeddings_dict = {}
    with open(path, 'r', encoding='utf-8') as file:
        for line in file:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], "float32")
            embeddings_dict[word] = vector
    return embeddings_dict
#
glove_path = 'glove.6B/glove.6B.100d.txt'
glove_embeddings = load_glove_embeddings(glove_path)

test_df = pd.read_csv('test.csv')
test_dataset = TextDataset(glove_embeddings, test_df)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def load_model(model_path, num_hiddens, ffn_num_hiddens, num_heads,
                               num_blks, dropout, num_classes):
    model = EvidenceDetectionModel(num_hiddens, ffn_num_hiddens, num_heads,
                               num_blks, dropout, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

#This optimizes predicting by leveraging GPU acceleration when available. If gpu is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#load model
model = load_model('model_epoch_14.pth', 100, 256, 4, 2, 0.1, 2)



#Predict

In [36]:
def predict(model, data_loader, device):
    model.eval()
    all_predictions = []

    with torch.no_grad():
        for embeddings in data_loader:
            embeddings = embeddings.to(device)
            logits = model(embeddings)
            predictions = torch.argmax(logits, dim=1)
            all_predictions.extend(predictions.cpu().numpy())

    return (all_predictions)

predictions = predict(model, test_loader, device)

In [37]:
predictions_df = pd.DataFrame({
    'prediction': predictions
})

predictions_df.to_csv('Group_70_a.csv', index=False)