In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/FDS


Mounted at /content/drive
/content/drive/MyDrive/FDS


In [2]:
import torch
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as T
from transformers import AutoTokenizer

# Import our custom modules
from dataset import TextOnlyDataset, ImageOnlyDataset, collate_text_fn
from models import TextEncoder, ImageEncoder, FusionClassifier
from utils import set_seed
from train import train_alternate

In [3]:
set_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [4]:
# Path to preprocessed text CSV (51000 rows)
csv_path = "/content/drive/MyDrive/FDS/cleaned_headlines.csv"

# Path to folder containing images (188 images)
img_folder = "/content/drive/MyDrive/FDS/News"

In [5]:
# HuggingFace tokenizer for text
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Text dataset
text_ds = TextOnlyDataset(csv_path, tokenizer, max_len=128)

# Image dataset
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor()
])
img_ds = ImageOnlyDataset(img_folder, transform=transform)

print("Text samples:", len(text_ds))
print("Image samples:", len(img_ds))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Text samples: 51948
Image samples: 188


In [6]:
batch_size = 32

# Split text dataset into train/val
n_text = len(text_ds)
n_train = int(0.8 * n_text)
train_text_ds, val_text_ds = random_split(text_ds, [n_train, n_text - n_train])

text_loader = DataLoader(train_text_ds, batch_size=batch_size, shuffle=True, collate_fn=collate_text_fn)
img_loader = DataLoader(img_ds, batch_size=batch_size, shuffle=True)


In [7]:
text_model = TextEncoder(model_name="bert-base-uncased")
img_model = ImageEncoder()
classifier = FusionClassifier(num_classes=2)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 210MB/s]


In [8]:
params = list(text_model.parameters()) + list(img_model.parameters()) + list(classifier.parameters())
optimizer = torch.optim.Adam(params, lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()

In [9]:
train_alternate(
    model_text=text_model,
    model_img=img_model,
    classifier=classifier,
    text_loader=text_loader,
    img_loader=img_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    epochs=20
)



=== Epoch 1 ===
Epoch 1 | Loss: 0.7206 | Acc: 0.4583

=== Epoch 2 ===
Epoch 2 | Loss: 0.5707 | Acc: 0.7526

=== Epoch 3 ===
Epoch 3 | Loss: 0.4731 | Acc: 0.8385

=== Epoch 4 ===
Epoch 4 | Loss: 0.4017 | Acc: 0.8750

=== Epoch 5 ===
Epoch 5 | Loss: 0.3314 | Acc: 0.8932

=== Epoch 6 ===
Epoch 6 | Loss: 0.2472 | Acc: 0.9167

=== Epoch 7 ===
Epoch 7 | Loss: 0.2798 | Acc: 0.8802

=== Epoch 8 ===
Epoch 8 | Loss: 0.2566 | Acc: 0.8958

=== Epoch 9 ===
Epoch 9 | Loss: 0.2471 | Acc: 0.9062

=== Epoch 10 ===
Epoch 10 | Loss: 0.2194 | Acc: 0.9010

=== Epoch 11 ===
Epoch 11 | Loss: 0.1989 | Acc: 0.9141

=== Epoch 12 ===
Epoch 12 | Loss: 0.1987 | Acc: 0.9219

=== Epoch 13 ===
Epoch 13 | Loss: 0.2494 | Acc: 0.9010

=== Epoch 14 ===
Epoch 14 | Loss: 0.2177 | Acc: 0.9115

=== Epoch 15 ===
Epoch 15 | Loss: 0.2474 | Acc: 0.8802

=== Epoch 16 ===
Epoch 16 | Loss: 0.2474 | Acc: 0.8984

=== Epoch 17 ===
Epoch 17 | Loss: 0.2004 | Acc: 0.9193

=== Epoch 18 ===
Epoch 18 | Loss: 0.1685 | Acc: 0.9401

=== Epoch

In [11]:
import torch

text_model.eval()
classifier.eval()

correct = 0
total = 0

with torch.no_grad():
    for item in val_text_ds:
        # Extract parts from the dictionary
        input_ids = item['input_ids'].unsqueeze(0).to(device)
        attention_mask = item['attention_mask'].unsqueeze(0).to(device)
        label = item['label'].unsqueeze(0).to(device)

        # Forward pass through text model
        text_features = text_model(input_ids=input_ids, attention_mask=attention_mask)

        # Classifier forward
        outputs = classifier(text_features)

        # Predictions
        preds = torch.argmax(outputs, dim=1)

        correct += (preds == label).sum().item()
        total += label.size(0)

accuracy = correct / total
print(f"Validation Accuracy: {accuracy:.4f}")


Validation Accuracy: 0.8354
