In [1]:
from rumour_milled.ml.load import load_headlines
from rumour_milled.ml.preprocess import tokenise_and_vectorise
from rumour_milled.ml.models.simple import SimpleHeadlineClassifier
from rumour_milled.ml.train import Trainer
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# GeForce Game Ready Driver 577.0
# GTX 1660
# Turing 7.5
# CUDA SDK 10.0-10.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
headlines, labels = load_headlines()
headlines = headlines[:512]
labels = labels[:512]
X = tokenise_and_vectorise(headlines, batch_size=128)
y = torch.tensor(labels, dtype=torch.float).unsqueeze(1)
dataset = TensorDataset(X, y)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

Vectorising 128/512
Vectorising 256/512
Vectorising 384/512
Vectorising 512/512


In [25]:
model = SimpleHeadlineClassifier(768, 256, 1)
loss_fn = nn.BCEWithLogitsLoss()  # Binary Cross Entropy for [0,1] output
optimiser = optim.Adam(model.parameters(), lr=0.001)
trainer = Trainer(model, loss_fn, optimiser, device)

In [26]:
trainer.train(
    train_loader,
    None,
    1000
)

Epoch 0 loss: 0.6750339828431606
Epoch 1 loss: 0.5732978507876396
Epoch 2 loss: 0.5504259876906872
Epoch 3 loss: 0.5426933579146862
Epoch 4 loss: 0.536028578877449
Epoch 5 loss: 0.5370371639728546
Epoch 6 loss: 0.532574899494648
Epoch 7 loss: 0.5274413246661425
Epoch 8 loss: 0.5258894767612219
Epoch 9 loss: 0.5250043943524361
Epoch 10 loss: 0.524835092946887
Epoch 11 loss: 0.5240542888641357
Epoch 12 loss: 0.5239408668130636
Epoch 13 loss: 0.5238663237541914
Epoch 14 loss: 0.5238344240933657
Epoch 15 loss: 0.5238197203725576
Epoch 16 loss: 0.5238040573894978
Epoch 17 loss: 0.5237897410988808
Epoch 18 loss: 0.5237819664180279
Epoch 19 loss: 0.5237738005816936
Epoch 20 loss: 0.5237667448818684
Epoch 21 loss: 0.5237602423876524
Epoch 22 loss: 0.5237546861171722
Epoch 23 loss: 0.5237525310367346
Epoch 24 loss: 0.5237469356507063
Epoch 25 loss: 0.5237432140856981
Epoch 26 loss: 0.5237403456121683
Epoch 27 loss: 0.5237376894801855
Epoch 28 loss: 0.5237353295087814
Epoch 29 loss: 0.5237332414

In [33]:
test_X = tokenise_and_vectorise(["Someone Unveils Self-Driving Bullet Train Connecting Beijing to London"]).to(device)
with torch.no_grad():
    print(trainer.model(test_X))

Vectorising 1/1
tensor([[1.7371e-13]], device='cuda:0')
