### Notebook 02: Baseline Models (Single-task)

This notebook covers the full preprocessing pipeline for baseline vision models for image classification using RestNet50.

Two different classifiers predict stance (support vs oppose) and persuasiveness (yes or no).

Images are resized, normalized and batched. Models are fine-tuned with cross-entropy loss and evaluated on our goal metric - F1-Score (Binary).

In [2]:
# Libraries
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [6]:
#Paths

data_path = "../../data/"
img_path = "../../data/images"

train_path = os.path.join(data_path,"train.csv")
dev_path   = os.path.join(data_path,"dev.csv")
test_path  = os.path.join(data_path,"test.csv")

#Load Data
df_train = pd.read_csv(train_path)
df_dev   = pd.read_csv(dev_path)
df_test  = pd.read_csv(test_path)

# Map labels to ints
stance_2id = {"oppose": 0, "support": 1}
pers_2id = {"no": 0, "yes": 1}

for df in [df_train, df_dev, df_test]:
    df["label"] = df["stance"].map(stance_2id)
    df["persuasiveness_label"] = df["persuasiveness"].map(pers_2id)


# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [None]:
# Dataset class
class ImageDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, include_labels=True):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.include_labels = include_labels

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, str(row['tweet_id']) + ".jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        
        if self.include_labels:
            stance = torch.tensor(row['label'], dtype=torch.long)
            pers = torch.tensor(row['persuasiveness_label'], dtype=torch.long)
            return image, stance, pers
        else:
            return image

In [None]:
#Load DataLoaders and Transforms
IMG_SIZE = 384  # Baseline size, can ajust later

transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

batch_size = 16

train_dataset = ImageDataset(df_train, img_path, transform=transform)
dev_dataset   = ImageDataset(df_dev, img_path, transform=transform)
test_dataset  = ImageDataset(df_test, img_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
dev_loader   = DataLoader(dev_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
# We define our baseline model (ResNet50)
def get_model(num_classes=2):
    model = models.resnet50(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model.to(device)

In [11]:
# Baseline A: Stance Classification

def train_model(model, train_loader, dev_loader, epochs=5, lr=2e-5):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    best_f1 = 0.0
    best_model_state = None
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for imgs, stance_labels, _ in train_loader:
            imgs = imgs.to(device)
            labels = stance_labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * imgs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for imgs, stance_labels, _ in dev_loader:
                imgs = imgs.to(device)
                labels = stance_labels.to(device)
                outputs = model(imgs)
                preds = torch.argmax(outputs, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        f1 = f1_score(all_labels, all_preds, average="binary", pos_label=1)
        print(f"Epoch {epoch+1}/{epochs} | Loss: {epoch_loss:.4f} | Dev F1: {f1:.4f}")
        
        if f1 > best_f1:
            best_f1 = f1
            best_model_state = model.state_dict()
    
    # Load best model
    model.load_state_dict(best_model_state)
    return model

def evaluate_model(model, data_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for imgs, stance_labels, _ in data_loader:
            imgs = imgs.to(device)
            labels = stance_labels.to(device)
            outputs = model(imgs)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average="binary", pos_label=1)
    recall = recall_score(all_labels, all_preds, average="binary", pos_label=1)
    f1 = f1_score(all_labels, all_preds, average="binary", pos_label=1)
    
    cm = confusion_matrix(all_labels, all_preds)
    
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1, "cm": cm, "y_true": all_labels, "y_pred": all_preds}



# We train the stance model
stance_model = get_model(num_classes=2)
stance_model = train_model(stance_model, train_loader, dev_loader, epochs=5, lr=2e-5)

# We evaluate the stance model
stance_results = evaluate_model(stance_model, test_loader)
print("Stance Test Results:", stance_results)

# Plot Confusion Matrix
plt.figure(figsize=(6,5))
sns.heatmap(stance_results["cm"], annot=True, fmt="d", cmap="Blues", xticklabels=["oppose","support"], yticklabels=["oppose","support"])
plt.title("Stance Confusion Matrix")
plt.show()



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\diego/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:04<00:00, 22.7MB/s]


UnidentifiedImageError: cannot identify image file '../../data/images\\1370338056415289348.jpg'

In [None]:
# Baseline B: Persuasiveness Classification

pers_model = get_model(num_classes=2)

def train_model_pers(model, train_loader, dev_loader, epochs=5, lr=2e-5):
    # Similar function, using persuasiveness labels
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    best_f1 = 0.0
    best_model_state = None
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for imgs, _, pers_labels in train_loader:
            imgs = imgs.to(device)
            labels = pers_labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * imgs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for imgs, _, pers_labels in dev_loader:
                imgs = imgs.to(device)
                labels = pers_labels.to(device)
                outputs = model(imgs)
                preds = torch.argmax(outputs, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        f1 = f1_score(all_labels, all_preds, average="binary", pos_label=1)
        print(f"Epoch {epoch+1}/{epochs} | Loss: {epoch_loss:.4f} | Dev F1: {f1:.4f}")
        
        if f1 > best_f1:
            best_f1 = f1
            best_model_state = model.state_dict()
    
    model.load_state_dict(best_model_state)
    return model

# We train the persuasiveness model
pers_model = train_model_pers(pers_model, train_loader, dev_loader, epochs=5, lr=2e-5)

# We evaluate the persuasiveness model
pers_results = evaluate_model(pers_model, test_loader)
print("Persuasiveness Test Results:", pers_results)

# Plot Confusion Matrix
plt.figure(figsize=(6,5))
sns.heatmap(pers_results["cm"], annot=True, fmt="d", cmap="Greens", xticklabels=["low","high"], yticklabels=["low","high"])
plt.title("Persuasiveness Confusion Matrix")
plt.show()


In [None]:
# Analyze correlation between stance and persuasiveness predictions
df_results = pd.DataFrame({
    "stance_true": stance_results["y_true"],
    "stance_pred": stance_results["y_pred"],
    "pers_true": pers_results["y_true"],
    "pers_pred": pers_results["y_pred"]
})

# Correlations between stance and persuasiveness correct predictions
# Do they correlate enough to build a multitask model?

correlation = df_results.apply(lambda x: x['stance_true']==x['stance_pred'] and x['pers_true']==x['pers_pred'], axis=1).mean()
print(f"Correlation between correct predictions (stance & pers): {correlation:.4f}")