This is the drive link
https://drive.google.com/drive/folders/14SVAWYF0mz62BXiEaqMHDvXliEaq-Wto?usp=sharing

LSTM TEST-RUN THE FIRST CELL
BERT TEST-RUN THE SECONED CELL             


In [11]:
#Run this cell to Test LstmNetwork
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from collections import Counter
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import matplotlib.pyplot as plt


class HateSpeechDataset(Dataset):
    def __init__(self, dataframe, vocab_to_int, max_length):
        self.data = dataframe
        self.vocab_to_int = vocab_to_int
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        Content = self.data.iloc[idx]['Content']
        label = self.data.iloc[idx]['Label']
        encode = [self.vocab_to_int[seq] for seq in Content.split() if seq in self.vocab_to_int]
        if len(encode) < self.max_length:
            encode += [0] * (self.max_length - len(encode))  #padding
        else:
            encode = encode[:self.max_length]

        seq_length = min(len(encode), self.max_length)

        return {
            'input_ids': torch.tensor(encode, dtype=torch.long),
            'length': torch.tensor(seq_length, dtype=torch.long),
            'labels': torch.tensor(label, dtype=torch.long)
        }


class Attention_layer(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention_layer, self).__init__()
        self.attention = nn.Linear(hidden_dim * 2, 1)

    def forward(self, lstm_output):
        output = self.attention(lstm_output)
        output = F.softmax(output, dim=1)
        output = torch.sum(output * lstm_output, dim=1)
        return output


class LSTMNet(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers=1, dropout=0.3):
        super(LSTMNet, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        self.dropout_l = nn.Dropout(dropout)

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True,dropout=0.1)

        self.attention = Attention_layer(hidden_dim)

        self.fc = nn.Linear(2*hidden_dim , output_dim)

    def forward(self, input_ids, lengths):
        output = self.embedding(input_ids)
        output = self.dropout_l(output)

        output = pack_padded_sequence(output, lengths.cpu(), batch_first=True, enforce_sorted=False)
        output,_  = self.lstm(output)
        output, _ = pad_packed_sequence(output, batch_first=True)

        output = self.attention(output)
        output = self.fc(output)
        return output


def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    correct_samples = 0
    n_sampels = 0

    for batch in tqdm(dataloader, desc="Training..."):
        input_ids = batch['input_ids'].to(device)
        lengths = batch['length'].to(device)
        labels = batch['labels'].to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, lengths)
        loss = criterion(outputs, labels)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(outputs, 1)
        correct_samples += torch.sum(predicted == labels).item()
        n_sampels += labels.size(0)

    acc = correct_samples / n_sampels
    avg_train_loss = total_loss / len(dataloader)
    print(f"Train Loss: {avg_train_loss}, Train Accuracy: {acc}")
    return avg_train_loss, acc


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)



def test(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct_samples = 0
    n_sampels = 0

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Testing..."):
            input_ids = batch['input_ids'].to(device)
            lengths = batch['length'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, lengths)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_samples += torch.sum(predicted == labels).item()
            n_sampels += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    acc = correct_samples / n_sampels
    print(f"Test Loss: {avg_loss}, Test Accuracy: {acc}")
    return avg_loss, acc

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data = pd.read_csv("HateSpeechDatasetBalanced.csv")

train_dataset, test_dataset = train_test_split(data, test_size=0.2, random_state=30)
test_dataset = test_dataset.sample(n=10000, replace=False, random_state=30)


embedding_dim = 300
hidden_dim = 128
output_dim = 2
num_epochs = 5
batch_size = 16
learning_rate = 0.001


vocab_counter = Counter()
max_length = 500
for text in train_dataset['Content']:
    words = text.split()
    vocab_counter.update(words)

vocab_to_int = {word: i for i, (word, _) in enumerate(vocab_counter.items(), 1)}
vocab_size = len(vocab_to_int) + 1

train_dataset = HateSpeechDataset(train_dataset, vocab_to_int, max_length)
test_dataset = HateSpeechDataset(test_dataset, vocab_to_int, max_length)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


lstm_model = LSTMNet(vocab_size, embedding_dim, hidden_dim, output_dim).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)


lstm_model.load_state_dict(torch.load("LSTMTrained.pth", map_location=device))
_=test(lstm_model, test_loader, criterion, device)



Testing...: 100%|██████████| 625/625 [00:09<00:00, 62.51it/s]

Test Loss: 0.2817416494011879, Test Accuracy: 0.8791





In [12]:
######################
###Run this to test the bert ###

import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
import torch.nn.functional as F
import matplotlib.pyplot as plt


# Define  hate speech detection Dataset class ,this Class have the same
# idea  of the class that we used in hw2 (help us in go) give us in hw2
#this class helps in preparing text data for hate speech detection tasks
class Hate_speech(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        x=len(self.data)
        return x

    def __getitem__(self, idx):
        text = self.data.iloc[idx]['Content']
        label = self.data.iloc[idx]['Label']
        encoding = self.tokenizer.encode_plus(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


#defin the BERT_Net
class FTB_Net(nn.Module):
    def __init__(self, Bert_model):
        super(FTB_Net, self).__init__()
        self.Bert = Bert_model
        self.fc = nn.Linear(Bert_model.config.hidden_size, 2)

        for param in self.Bert.parameters():     #turn off all hidden layers - Freeze
            param.requires_grad = False
        for param in self.Bert.encoder.layer[-2:].parameters(): #turn on last 2 layers - Fine tune
            param.requires_grad = True

    def forward(self, input_ids, attention_mask=None):
        output = self.Bert(input_ids=input_ids, attention_mask=attention_mask)
        output = output.pooler_output
        output = self.fc(output)        #classifaction layer
        return output


# train and test functions
def train(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0.0
    correct_samples = 0
    n_samples = 0

    for batch in tqdm(dataloader, desc="Training..."):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = F.cross_entropy(outputs, labels)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        predictions = torch.argmax(outputs, dim=1)
        correct_samples += torch.sum(predictions == labels).item()
        n_samples += labels.size(0)

    acc = correct_samples / n_samples
    avg_train_loss = total_loss / len(dataloader)
    print(f"Train Loss: {avg_train_loss}, Train Accuracy: {acc}")

    # Append metrics for plotting
    train_losses.append(avg_train_loss)
    train_accuracies.append(acc)

def test(model, dataloader, device):
    model.eval()
    total_loss = 0.0
    correct_samples = 0
    n_samples = 0
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Testing..."):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = F.cross_entropy(outputs, labels)
            total_loss += loss.item()
            predictions = torch.argmax(outputs, dim=1)
            correct_samples += torch.sum(predictions == labels).item()
            n_samples += labels.size(0)

    avg_test_loss = total_loss / len(dataloader)
    acc = correct_samples / n_samples
    print(f"Test Loss: {avg_test_loss}, Test Accuracy: {acc}")

    # Append metrics for plotting
    test_losses.append(avg_test_loss)
    test_accuracies.append(acc)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Load the date and divede it -80% for testing,20% to training
dataset = pd.read_csv("HateSpeechDatasetBalanced.csv")
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=30)

test_dataset = test_dataset.sample(n=10000, replace=False, random_state=30)

# Load the tokenizer  of BERT
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

#create train,test objects
train_dataset = Hate_speech(train_dataset, tokenizer)
test_dataset = Hate_speech(test_dataset, tokenizer)

#create DataLoader for train,test objects
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

Bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)  #default num of layers is 12
model = FTB_Net(Bert_model).to(device)

optimizer = AdamW(model.parameters(), lr=0.00005)


# Lists to store training and testing metrics
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []

num_epochs = 5
model.load_state_dict(torch.load("FTBertTrained.pth", map_location=device))

test(model, test_loader, device)

Testing...: 100%|██████████| 625/625 [01:21<00:00,  7.66it/s]

Test Loss: 0.24458147599548102, Test Accuracy: 0.8998



