In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from rumour_milled.load import load_headlines
from rumour_milled.preprocessing import tokenise_headlines, vectorise_tokens

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# GeForce Game Ready Driver 577.0
# GTX 1660
# Turing 7.5
# CUDA SDK 10.0-10.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
headlines, labels = load_headlines()

In [4]:
sample = headlines[:100]
tokens = tokenise_headlines(sample)
vector = vectorise_tokens(tokens, batch_size=11)

Vectorising 11/100
Vectorising 22/100
Vectorising 33/100
Vectorising 44/100
Vectorising 55/100
Vectorising 66/100
Vectorising 77/100
Vectorising 88/100
Vectorising 99/100
Vectorising 100/100


In [None]:
tokens = tokeniser(
    headlines, 
    padding=True,
    truncation=True,
    return_tensors="pt"
)

with torch.no_grad():
    outputs = vectoriser(**{k: v.to(device) for k, v in tokens.items()})

In [None]:
def tokenise_and_vectorise(headline):
    tokens = tokeniser(
        headline, 
        padding=True,
        truncation=True,
        return_tensors="pt"
    )
    with torch.no_grad():
        outputs = vectoriser(**tokens)
    return outputs.last_hidden_state[:, 0, :]

In [None]:
X = outputs.last_hidden_state[:, 0, :]
y = torch.tensor(labels[:100], dtype=torch.float).unsqueeze(1)

In [None]:
from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(X, y)
data_loader = DataLoader(dataset, batch_size=10, shuffle=True)

In [None]:
from rumour_milled.models import SimpleHeadlineClassifier
model = SimpleHeadlineClassifier(768, 256, 1).to(device)
criterion = nn.BCEWithLogitsLoss()  # Binary Cross Entropy for [0,1] output
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Training loop
for epoch in range(1000):
    total_loss = 0
    for batch_X, batch_y in data_loader:
        optimizer.zero_grad()
        logits = model(batch_X.to(device))
        loss = criterion(logits, batch_y.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

In [None]:
with torch.no_grad():
    preds = model(tokenise_and_vectorise("Earth shattering headline"))
    print("Predictions:", preds.round())