# Set-up

In [38]:
import torch
import time
import pandas as pd
import transformers
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
transformers.logging.set_verbosity_error()

# Check if GPU is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Read the CSV file using pandas
def read_data_from_csv(csv_file):
    df = pd.read_csv(csv_file)

    # Extract claim, evidence, and labels
    claims = df['Claim'].tolist()
    evidence = df['Evidence'].tolist()  
    labels = df['label'].tolist()  

    return claims, evidence, labels

def time_string(seconds):
    minutes = int(seconds / 60) % 60
    hours = int(seconds / (60 * 60)) % 24
    seconds = int(seconds) % 60
    output = f"{seconds}s"
    if minutes >= 1:
        output = f"{minutes}m {seconds}s"
    if hours >= 1:
        output = f"{hours}h {minutes}m {seconds}s"
    return output

class PairwiseDataset(Dataset):
    def __init__(self, claims, evidences, labels, tokenizer, max_len):
        self.claims = claims
        self.evidences = evidences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        claim = str(self.claims[idx])
        evidence = str(self.evidences[idx])
        label = self.labels[idx]

        encoding = self.tokenizer(
            claim,
            evidence,
            add_special_tokens=True,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            return_tensors='pt'
        )

        return {
            'claim': claim,
            'evidence': evidence,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# 2. Initialize model

In [39]:
# Initialize tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')


train_csv_file = 'training_data/ED/train.csv'
claims, evidences, labels = read_data_from_csv(train_csv_file)

# Split training and validation data 80/20
claims_train, claims_val, evidences_train, evidences_val, labels_train, labels_val = train_test_split(
    claims, evidences, labels, test_size=0.2, random_state=42
)

# Create datasets and dataloaders
train_dataset = PairwiseDataset(claims_train, evidences_train, labels_train, tokenizer, max_len=128)
val_dataset = PairwiseDataset(claims_val, evidences_val, labels_val, tokenizer, max_len=128)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# 3. Fine tuning

In [40]:
# Fine-tuning parameters
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
epochs = 3

# Fine-tuning loop
for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    
    start = time.time()
    model.train()
    total_loss = 0
    
    prev_elapsed = 0
    est_timings = []
    
    print("Training")
    for i, batch in enumerate(train_loader):
        curr_t = time.time() - start
        est_timings.append(curr_t - prev_elapsed)
        prev_elapsed = curr_t
        est_timing = (sum(est_timings) / len(est_timings)) * (len(train_loader) - i)
        print(f"\rTraining batch {i + 1}/{len(train_loader)}\t\t\t\tElapsed: {time_string(curr_t)}\t\t\t\tEst: {time_string(est_timing)}\t\t\t\t", end="")
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print("")
    
    avg_train_loss = total_loss / len(train_loader)
    print(f"Average Training Loss: {avg_train_loss}")
    print(f"Total training time {time.time() - start}")
    
    print("")

    # Validation loop
    start = time.time()
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    
    prev_elapsed = 0
    est_timings = []
    
    print("Validation")
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            curr_t = time.time() - start
            prev_elapsed = curr_t - prev_elapsed
            est_timings.append(prev_elapsed)
            est_timing = (sum(est_timings) / len(est_timings)) * (len(val_loader) - i)
            print(f"\rValidating batch {i + 1}/{len(val_loader)}\t\t\t\tElapsed: {time_string(curr_t)}\t\t\t\tEst: {time_string(est_timing)}\t\t", end="")
            
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            val_loss += loss.item()

            _, predicted = torch.max(outputs.logits, 1)
            total += labels.size(0)
            correct += int((predicted == labels).sum().item())
            
    print("")

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = correct / total
    print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {avg_val_loss}, Validation Accuracy: {val_accuracy}")
    
    print("")
    
model.save_pretrained("model")

Epoch 1/3
Training
Training batch 1186/1186				Elapsed: 1h 21m 5s				Est: 4s										
Average Training Loss: 0.36071435558809123
Total training time 4865.751107931137

Validation
Validating batch 297/297				Elapsed: 6m 20s				Est: 1m 35s						
Epoch 1/3, Validation Loss: 0.2899301989374036, Validation Accuracy: 0.869014975743514

Epoch 2/3
Training
Training batch 1186/1186				Elapsed: 1h 19m 43s				Est: 4s								
Average Training Loss: 0.22492751586023235
Total training time 4784.41655421257

Validation
Validating batch 297/297				Elapsed: 6m 21s				Est: 1m 35s						
Epoch 2/3, Validation Loss: 0.2925309005102475, Validation Accuracy: 0.8776629403079519

Epoch 3/3
Training
Training batch 1186/1186				Elapsed: 1h 19m 53s				Est: 4s								
Average Training Loss: 0.11936520682132985
Total training time 4794.126923084259

Validation
Validating batch 297/297				Elapsed: 6m 21s				Est: 1m 35s						
Epoch 3/3, Validation Loss: 0.36749167492933044, Validation Accuracy: 0.8523518245095971


# 4. Testing

In [41]:
import time
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained("model")

test_csv_file = 'training_data/ED/dev.csv'
test_claims, test_evidences, test_labels = read_data_from_csv(test_csv_file)

# Create test dataset
test_dataset = PairwiseDataset(test_claims, test_evidences, test_labels, tokenizer, max_len=128)

# Create test dataloader
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Perform inference
model.eval()
predicted_labels = []

prev_elapsed = 0
est_timings = []
start = time.time()

with torch.no_grad():
    for i, batch in enumerate(test_loader):
        
        curr_t = time.time() - start
        prev_elapsed = curr_t - prev_elapsed
        est_timings.append(prev_elapsed)
        est_timing = (sum(est_timings) / len(est_timings)) * (len(test_loader) - i)
        print(f"\rTesting batch {i + 1}/{len(test_loader)}\t\t\t\tElapsed: {time_string(curr_t)}\t\t\t\tEst: {time_string(est_timing)}\t\t", end="")
    
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, batch_predicted_labels = torch.max(outputs.logits, 1)
        predicted_labels.extend(batch_predicted_labels.cpu().tolist())

# Calculate accuracy
correct_predictions = sum(1 for pred, label in zip(predicted_labels, test_labels) if pred == label)
accuracy = correct_predictions / len(test_labels)

print(predicted_labels)
print(test_labels)
print("Accuracy:", accuracy)

Testing batch 371/371				Elapsed: 7m 56s				Est: 1m 59s						[0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,