In [1]:
from rumour_milled.ml.load import load_headlines
from rumour_milled.ml.preprocess import tokenise_and_vectorise
from rumour_milled.ml.models.simple import SimpleHeadlineClassifier
from rumour_milled.ml.train import Trainer
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# GeForce Game Ready Driver 577.0
# GTX 1660
# Turing 7.5
# CUDA SDK 10.0-10.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
headlines, labels = load_headlines()
headlines = headlines[:640]
labels = labels[:640]
X_train = tokenise_and_vectorise(headlines[:512], batch_size=128)
y_train = torch.tensor(labels[:512], dtype=torch.float).unsqueeze(1)
X_test = tokenise_and_vectorise(headlines[512:], batch_size=128)
y_test = torch.tensor(labels[512:], dtype=torch.float).unsqueeze(1)

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

Vectorising 128/512
Vectorising 256/512
Vectorising 384/512
Vectorising 512/512
Vectorising 128/128


In [None]:
model = SimpleHeadlineClassifier(768, 256, 1)
loss_fn = nn.BCEWithLogitsLoss()  # Binary Cross Entropy for [0,1] output
optimiser = optim.Adam(model.parameters(), lr=0.001)
trainer = Trainer(model, loss_fn, optimiser, device)

In [8]:
trainer.train(
    train_loader,
    test_loader,
    100
)

Epoch 0 | train_loss: 0.717322550714016 | val_loss: 0.6931650638580322
Epoch 1 | train_loss: 0.6931512132287025 | val_loss: 0.6931476444005966
Epoch 2 | train_loss: 0.6931473091244698 | val_loss: 0.6931473612785339
Epoch 3 | train_loss: 0.6931472606956959 | val_loss: 0.6931473165750504
Epoch 4 | train_loss: 0.6931472383439541 | val_loss: 0.6931473016738892
Epoch 5 | train_loss: 0.6931472346186638 | val_loss: 0.6931473314762115
Epoch 6 | train_loss: 0.6931472383439541 | val_loss: 0.6931473314762115
Epoch 7 | train_loss: 0.6931472308933735 | val_loss: 0.6931473016738892
Epoch 8 | train_loss: 0.6931472346186638 | val_loss: 0.693147286772728
Epoch 9 | train_loss: 0.6931472308933735 | val_loss: 0.6931473016738892
Epoch 10 | train_loss: 0.6931472383439541 | val_loss: 0.6931473016738892
Epoch 11 | train_loss: 0.6931472234427929 | val_loss: 0.6931473314762115
Epoch 12 | train_loss: 0.6931472346186638 | val_loss: 0.6931473165750504
Epoch 13 | train_loss: 0.6931472383439541 | val_loss: 0.6931473

In [33]:
test_X = tokenise_and_vectorise(["Someone Unveils Self-Driving Bullet Train Connecting Beijing to London"]).to(device)
with torch.no_grad():
    print(trainer.model(test_X))

Vectorising 1/1
tensor([[1.7371e-13]], device='cuda:0')
