In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import sys
import matplotlib.pyplot as plt
import numpy as np

from torch.utils import data
from datetime import datetime

# Loading in my other files
from lstm import LSTM
from data_generator import Dataset

In [None]:
# Determining whether the data should be processed on the gpu
use_cuda = torch.cuda.is_available ()
#use_cuda = False
print ("Using GPU:", use_cuda)
# Use cpu if you're getting incomprehensible error messages
processor = torch.device ("cuda:0" if use_cuda else "cpu")

data_path = "news_sample.csv"
embedding_path = "glove.6B.50d.txt"
labels = ["fake","satire","bias","conspiracy","junksci","hate","clickbait","unreliable","political","reliable","unknown"]
embedding_size = 50

# When working with data of different lengths, pytorch cannot use batch_size > 1
# You can do it by using the packed/padded sequences implemented by pytorch
batch_size = 1

# Instatiating the dataset from the Dataset class
# Using this can give you the possibility of loading in just the data you want,
# instead of the entire dataset
dataset = Dataset (data_path, embedding_path, labels)

# num_workers decides how many parallel processes can load data.
# Set to 0 if you're getting incomprehensible error messages
training_generator = data.DataLoader (dataset, batch_size=batch_size, shuffle=True, num_workers=8)

# Remember to do validation too!
# This simply uses the training data. Don't do this in a real example!
# The validation data should be loaded in the exact same way every time for consistent results
# That is relevant if you use some sort of randomised data augmentation
validation_set = Dataset(data_path, embedding_path, labels)
validation_generator = data.DataLoader (validation_set, batch_size=batch_size, shuffle=False, num_workers=8)


# Instantiating the model
# 50 is the size of the data representation. In this case 50 from the glove vectors
# 200 is the size of the hidden layer in the lstm.
# Hidden size is analogous to the width of a layer of a fully connected network
# Increasing hidden size too mucn may lead to memory problems
model = LSTM (50, 500, len(labels)).to(processor)

# Alternatively, you can load a previously saved model
# loaded_params = torch.load("50_epoch_model.pth")
# model = LSTM(**loaded_params["args_dict"]).to(processor)
# model.load_state_dict(loaded_params["state_dict"])


# Defining a loss function. This will vary depending on your problem
# Look at the pytorch documentation for different kinds, and short explanations
loss_function = nn.CrossEntropyLoss().to(processor)

# Defining an optimizer. Different optimizers can be experimented with such as Adam
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
# Initialising some variables for use in training
batches = float("inf")
time_diff = 0
min_loss = float("inf")
val_loss = float("inf")
no_improv = 0
min_val_loss = float("inf")
epochs = 10
print_stuff = False

# These two can be used to plot loss afterwards
loss_list = []
val_loss_list = []

for epoch in range(epochs):
    for i, (batch, labels) in enumerate(training_generator):

        # Keeping track of stuff
        # Time is not super accurate
        start_time = datetime.now()
        est_time_left = str(time_diff*(min(batches, dataset.__len__()) - i)+time_diff*(epochs-(epoch+1))*min(batches, dataset.__len__()/batch_size)).split(".")[0]
        sys.stdout.write("\rEpoch: {0}. Batch: {1}. Min loss: {2:.5f}. Time left: {3}. Best: {4} batches ago. Val loss: {5:.5f}".format(epoch+1, i+1, min_loss, est_time_left, no_improv, val_loss))

        # Putting data on gpu
        batch = batch.to(processor)
        labels = labels.to(processor)
        
        model.train()
        model.zero_grad()

        # Getting a prediction for a batch, and calculating loss
        out = model(batch)
        loss = loss_function(out, labels)

        loss_list.append(loss.item())

        # Backpropagating and "stepping down" the metaphorical hill for gradient descent
        loss.backward()
        optimizer.step()

        # For tracking progress
        end_time = datetime.now()
        time_diff = end_time - start_time

        # Nice for tracking, but unnecessary
        if loss.item() < min_loss:
            min_loss = loss.item()
            no_improv = 0
        else:
            no_improv += 1

    # Validating
    val_loss = 0
    model.eval()
    for batch, labels in validation_generator:
        batch = batch.to(processor)
        labels = labels.to(processor)
        
        out = model(batch)
        loss = loss_function(out, labels)
        val_loss += loss.item()
    val_loss = val_loss/len(validation_generator)
    
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        model.save("early_stop_model.pth")
    val_loss_list.append(val_loss)

# You can save trained models for later use
model.save("{}_epoch_model.pth".format(epochs))

In [None]:
# Plotting the loss through the epochs
train_placings = np.linspace(0,epoch+1,len(loss_list))
val_placings = np.arange (1, epoch+2)

plt.plot(train_placings, loss_list, label="Training")
plt.plot(val_placings, val_loss_list, label="Validation")

plt.title("Loss during training")
plt.ylabel("Loss")
plt.xlabel("Epoch")
#plt.yscale("log")
plt.legend()

plt.savefig("loss_log.png")