# Fake News Classification - LSTM

## 1. Imports

In [1]:
!pip install clearml > /dev/null 2>&1

In [2]:
import os
import string
import re

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import model_selection
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import nltk

from joblib import dump, load

### Logging

In [3]:
from clearml import Task, OutputModel
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()

web_server = 'https://app.community.clear.ml'
api_server = 'https://api.community.clear.ml'
files_server = 'https://files.community.clear.ml'
access_key = user_secrets.get_secret("clear_ml_access_key")
secret_key = user_secrets.get_secret("clear_ml_secret_key")

Task.set_credentials(web_host=web_server,
                     api_host=api_server,
                     files_host=files_server,
                     key=access_key,
                     secret=secret_key)

## 2. Data loading and preprocessing

Please note that all news without title of text are discarded.

In [4]:
data = pd.read_csv('../input/fake-news-classification/WELFake_Dataset.csv', index_col=0)
data = data.dropna()
y, X = data.loc[:, 'label'], data.loc[:, data.columns != 'label']
print(f'Total number of observations: {len(X)}\n')
print(f'Ratio of real and fake news:')
y.value_counts(normalize=True).rename({1: 'real', 0: 'fake'})

Total number of observations: 71537

Ratio of real and fake news:


real    0.510351
fake    0.489649
Name: label, dtype: float64

### Data preprocessing

In [5]:
X_clean_title = pd.DataFrame({"title": X['title']})

#### Merge title and text

In [None]:
X_clean = pd.DataFrame({"title_text": X['title'] + " " + X['text']})

#### Clean data

In [6]:
stopwords = set(nltk.corpus.stopwords.words('english'))
porter_steemer = nltk.stem.PorterStemmer()

In [7]:
def remove_urls(text):
    URL_REGEX = r"[(http(s)?):\/\/(www\.)?a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)"
    return re.sub(URL_REGEX, '', text)

def remove_non_alphabetical_characters(text):
    return re.sub('[^a-zA-Z]', ' ', text)

def remove_stopwords(text):
    words = text.split()
    return ' '.join([word for word in words if word not in stopwords])
    
def stem_words(text):
    words = text.split()
    return ' '.join([porter_steemer.stem(word) for word in words])

def transform_text(text):
    text = remove_urls(text)
    text = remove_non_alphabetical_characters(text)
    text = text.lower()
    text = remove_stopwords(text)
    text = stem_words(text)
    
    return text

In [5]:
CLEAN_DATA_PATH = '../input/title-text-fake-news-clean/x_clean.csv'

if os.path.isfile(CLEAN_DATA_PATH):
    X_clean = pd.read_csv(CLEAN_DATA_PATH, index_col=0)
else:
    X_clean['title_text'] = X_clean['title_text'].apply(transform_text)

In [8]:
X_clean_title['title'] = X_clean_title['title'].apply(transform_text)

# LSTM

In [7]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import time
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader

In [13]:
X_clean_title['class'] = y

In [14]:
X_clean_title = X_clean_title[X_clean_title['title'].str.len() > 0]

In [15]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
            X_clean_title['title'], X_clean_title['class'], test_size=0.33, random_state=256, stratify=X_clean_title['class'])

In [16]:
split = []
for index, row in X_clean_title.iterrows():
    if index in X_train.index:
        split.append('train')
    else:
        split.append('test')
X_clean_title['split'] = split

In [41]:
PADDING_VALUE = 0

class NaiveVectorizer:
    def __init__(self, tokenized_data, **kwargs):
        tokenized_data = [seq.split() for seq in tokenized_data]
        self.wv = dict()
        iter = PADDING_VALUE + 1
        for sequence in tokenized_data:
          for word in sequence:
            if word not in self.wv:
              self.wv[word] = iter
              iter += 1

    def trim_vectorize(self, tokenized_seq, max_len=600):
        return self.vectorize(tokenized_seq[:max_len])
        
    def vectorize(self, tokenized_seq):
        tokens = []
        for word in tokenized_seq:
          if word in self.wv:
            tokens.append(self.wv[word])
            
        return torch.LongTensor(tokens)

In [9]:
class DatasetNews(Dataset):
    SPLIT_TYPES = ["train", "test"]

    def __init__(self, data, preprocess_fn, split="train"):
        super(DatasetNews, self).__init__()
        if split not in self.SPLIT_TYPES:
            raise AttributeError(f"No such split type: {split}")

        self.split = split
        self.label = [i for i, c in enumerate(data.columns) if c == "class"][0]
        self.data_col = [i for i, c in enumerate(data.columns) if c == "title"][0]
        self.data = data[data["split"] == self.split]
        self.preprocess_fn = preprocess_fn

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        seq = self.preprocess_fn(self.data.iloc[idx, self.data_col].split())
        label = self.data.iloc[idx, self.label]
        return (seq, label)


In [26]:
from torch.nn.utils.rnn import pad_sequence
naive_vectorizer = NaiveVectorizer(X_clean_title.loc[X_clean_title["split"] == "train", "title"])

def get_datasets():
    train_dataset = DatasetNews(data=X_clean_title, preprocess_fn=naive_vectorizer.vectorize)
    test_dataset = DatasetNews(data=X_clean_title, preprocess_fn=naive_vectorizer.vectorize, split="test")
        
    return train_dataset, test_dataset

def custom_collate_fn(pairs):
    seqcs, lengths, labels = [], [], []
    for pair in pairs:
        if len(pair[0]) > 0:
          labels.append(pair[1])
          lengths.append(len(pair[0]))
          seqcs.append(pair[0])

    seqcs = pad_sequence(seqcs, padding_value=PADDING_VALUE)
    lengths = torch.LongTensor(lengths)
    labels = torch.Tensor(labels)
    return seqcs, lengths, labels

In [34]:
class LSTMClassifier(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, classes, batch_size, dropout_prob, num_layers):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, num_layers =num_layers, dropout=dropout_prob)
        self.linear = nn.Linear(hidden_dim, classes)
        self.drop = nn.Dropout(dropout_prob)

    def forward(self, sentence, lengths):
        batch_size = sentence.shape[1]
        embeddings = self.embedding(sentence)
        embeddings = self.drop(embeddings)
        packed_embeddings = pack_padded_sequence(embeddings, lengths.cpu(), enforce_sorted=False)
        lstm_out, _ = self.lstm(packed_embeddings)
        output, hidden = pad_packed_sequence(lstm_out)
        out = output[lengths - 1, range(batch_size) , :]
        x = self.linear(out)
        x = x.squeeze()
        scores = torch.sigmoid(x)
        return scores

In [47]:
task = Task.create(project_name='mlcloud_fake_news', task_name='lstm_title')
task.mark_started()
logger = task.get_logger()

In [48]:
config = {
    'n_epochs': 20,
    'lr': 1e-3,
    'batch_size': 128, 
    'optimizer': 'Adam',
    'hidden_dim': 128, 
    'embedding_dim': 50,
    'dropout': 0.5, 
    'num_layers': 2
}
task.connect(config)

{'n_epochs': 20,
 'lr': 0.001,
 'batch_size': 128,
 'optimizer': 'Adam',
 'hidden_dim': 128,
 'embedding_dim': 50,
 'dropout': 0.5,
 'num_layers': 2}

In [10]:
def train(model, training_data, test_data, optimizer, device, loss_fn):

    for epoch in range(config['n_epochs']):

        print('[ Epoch', epoch, ']')
        
        start = time.time()
        train_loss, train_acc, train_f1, train_fpr = train_epoch(model, training_data, optimizer, device, loss_fn)
        print('  - (Training)   loss: {loss: 8.5f}, accuracy: {acc:3.3f} %, f1: {f1:3.3f}%, fpr: {fpr:3.3f}%, time: {time:3.3f} min'.format(
            loss = train_loss, acc=100*train_acc, f1=100*train_f1, fpr=100*train_fpr,
            time=(time.time()-start)/60))
        
        start = time.time()
        test_loss, test_acc, test_f1, test_fpr = eval_epoch(model, test_data, device, loss_fn)
        print('  - (Test)       loss: {loss: 8.5f}, accuracy: {acc:3.3f} %, f1: {f1:3.3f}%, fpr: {fpr:3.3f}%, time: {time:3.3f} min'.format(
            loss = test_loss, acc=100*test_acc, f1=100*test_f1, fpr=100*test_fpr,
            time=(time.time()-start)/60))

        logger.report_scalar(title='Loss', series='Train', iteration=epoch, value=train_loss)
        logger.report_scalar(title='Accuracy', series='Train', iteration=epoch, value=train_acc)
        logger.report_scalar(title='Loss', series='Test', iteration=epoch, value=test_loss)
        logger.report_scalar(title='Accuracy', series='Test', iteration=epoch, value=test_acc)
        logger.report_scalar(title='F1', series='Test', iteration=epoch, value=test_f1)
        logger.report_scalar(title='FPR', series='Test', iteration=epoch, value=test_fpr)
        logger.report_scalar(title='F1', series='Train', iteration=epoch, value=train_f1)
        logger.report_scalar(title='FPR', series='Train', iteration=epoch, value=train_fpr)

In [11]:
def train_epoch(model, training_data, optimizer, device, loss_fn):
    
        model.train()

        total_loss = 0
        n_examples_total, n_examples_correct = 0, 0

        for batch in tqdm(training_data, mininterval=2,desc='  - (Training)   ', leave=False):

            seqcs, lengths, labels = batch[0].to(device), batch[1], batch[2].to(device)
            # forward
            optimizer.zero_grad()
            pred = model(seqcs, lengths)

            # backward
            loss, n_correct = eval_performance(pred, labels, loss_fn)
            loss.backward()
            # Calculating False Positive Rate
            cf_matrix = confusion_matrix(labels.detach().cpu().numpy(), torch.round(pred).detach().cpu().numpy())
            tn, fp, fn, tp = cf_matrix.ravel()
            fpr = fp / (fp + tn)

            # Calculating F1-score
            f1 = f1_score(labels.detach().cpu().numpy(), torch.round(pred).detach().cpu().numpy())

            # update parameters
            optimizer.step()

            total_loss += loss.item()

            n_examples_total += batch[2].size()[0]
            n_examples_correct += n_correct
            # print(f'Loss: {loss.item()}, acc: {n_correct/batch[2].size()[0]}')

        accuracy = n_examples_correct/n_examples_total

        return total_loss, accuracy, f1, fpr

In [12]:
def eval_epoch(model, test_data, device, loss_fn):

    model.eval()

    total_loss = 0
    n_examples_total, n_examples_correct = 0, 0

    with torch.no_grad():
        for batch in tqdm(test_data, mininterval=2, desc='  - (Test) ', leave=False):

            seqcs, lengths, labels = batch[0].to(device), batch[1], batch[2].to(device)
            
            # forward
            pred = model(seqcs, lengths)
            loss, n_correct = eval_performance(pred, labels, loss_fn)
            # Calculating False Positive Rate
            cf_matrix = confusion_matrix(labels.detach().cpu().numpy(), torch.round(pred).detach().cpu().numpy())
            tn, fp, fn, tp = cf_matrix.ravel()
            fpr = fp / (fp + tn)

            # Calculating F1-score
            f1 = f1_score(labels.detach().cpu().numpy(), torch.round(pred).detach().cpu().numpy())

            total_loss += loss.item()
            
            n_examples_total += batch[2].size()[0]
            n_examples_correct += n_correct

    accuracy = n_examples_correct/n_examples_total

    return total_loss, accuracy, f1, fpr


def eval_performance(prediction, ground_truth, loss_fn):
    
    loss = loss_fn(prediction, ground_truth)

    n_correct = (torch.round(prediction) == ground_truth.to(torch.int32)).sum().item()

    return loss, n_correct

In [52]:
training_data, test_data = get_datasets()
train_dataloader = DataLoader(training_data, batch_size=config['batch_size'], shuffle=True, collate_fn=custom_collate_fn)
test_dataloader = DataLoader(test_data, batch_size=config['batch_size'], shuffle=True, collate_fn=custom_collate_fn)

In [53]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMClassifier(embedding_dim=config['embedding_dim'], hidden_dim=config['hidden_dim'], vocab_size=len(naive_vectorizer.wv)+1, classes=1, batch_size=config['batch_size'], dropout_prob=config['dropout'], num_layers=config['num_layers']).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=config['lr'])
loss = nn.BCELoss()
train(model=model, training_data=train_dataloader, test_data=test_dataloader, optimizer=optimizer, device=DEVICE, loss_fn=loss)

[ Epoch 0 ]


                                                                    

  - (Training)   loss:  176.61138, accuracy: 74.333 %, f1: 79.365%, fpr: 13.889%, time: 0.128 min


                                                              

  - (Test)       loss:  62.98480, accuracy: 84.169 %, f1: 83.186%, fpr: 7.576%, time: 0.043 min
[ Epoch 1 ]


                                                                    

  - (Training)   loss:  136.54202, accuracy: 82.670 %, f1: 83.333%, fpr: 18.182%, time: 0.128 min


                                                              

  - (Test)       loss:  55.86657, accuracy: 86.895 %, f1: 86.179%, fpr: 13.636%, time: 0.043 min
[ Epoch 2 ]


                                                                    

  - (Training)   loss:  121.32689, accuracy: 85.305 %, f1: 88.571%, fpr: 11.765%, time: 0.135 min


                                                              

  - (Test)       loss:  52.74808, accuracy: 87.613 %, f1: 89.552%, fpr: 11.667%, time: 0.043 min
[ Epoch 3 ]


                                                                    

  - (Training)   loss:  113.03671, accuracy: 86.715 %, f1: 90.411%, fpr: 12.121%, time: 0.127 min


                                                              

  - (Test)       loss:  50.72792, accuracy: 88.224 %, f1: 87.603%, fpr: 15.714%, time: 0.043 min
[ Epoch 4 ]


                                                                    

  - (Training)   loss:  106.30176, accuracy: 87.650 %, f1: 89.655%, fpr: 7.500%, time: 0.127 min


                                                              

  - (Test)       loss:  48.09692, accuracy: 88.810 %, f1: 81.890%, fpr: 16.129%, time: 0.043 min
[ Epoch 5 ]


                                                                    

  - (Training)   loss:  101.26584, accuracy: 88.185 %, f1: 90.411%, fpr: 14.706%, time: 0.126 min


                                                              

  - (Test)       loss:  46.78207, accuracy: 89.073 %, f1: 88.136%, fpr: 4.688%, time: 0.053 min
[ Epoch 6 ]


                                                                    

  - (Training)   loss:  97.09115, accuracy: 88.932 %, f1: 83.784%, fpr: 18.750%, time: 0.132 min


                                                              

  - (Test)       loss:  45.17035, accuracy: 89.587 %, f1: 93.431%, fpr: 6.897%, time: 0.044 min
[ Epoch 7 ]


                                                                    

  - (Training)   loss:  93.65282, accuracy: 89.361 %, f1: 89.189%, fpr: 15.152%, time: 0.128 min


                                                              

  - (Test)       loss:  45.09700, accuracy: 89.710 %, f1: 90.370%, fpr: 13.115%, time: 0.044 min
[ Epoch 8 ]


                                                                    

  - (Training)   loss:  90.11037, accuracy: 89.909 %, f1: 82.667%, fpr: 26.471%, time: 0.128 min


                                                              

  - (Test)       loss:  43.98312, accuracy: 90.186 %, f1: 91.803%, fpr: 7.576%, time: 0.050 min
[ Epoch 9 ]


                                                                    

  - (Training)   loss:  86.47769, accuracy: 90.352 %, f1: 93.827%, fpr: 10.345%, time: 0.130 min


                                                              

  - (Test)       loss:  43.01785, accuracy: 90.318 %, f1: 89.933%, fpr: 18.182%, time: 0.044 min
[ Epoch 10 ]


                                                                    

  - (Training)   loss:  84.78364, accuracy: 90.415 %, f1: 87.879%, fpr: 3.030%, time: 0.128 min


                                                              

  - (Test)       loss:  42.59404, accuracy: 90.555 %, f1: 93.706%, fpr: 7.273%, time: 0.043 min
[ Epoch 11 ]


                                                                    

  - (Training)   loss:  82.00130, accuracy: 90.894 %, f1: 91.176%, fpr: 13.514%, time: 0.123 min


                                                              

  - (Test)       loss:  42.08739, accuracy: 90.534 %, f1: 89.922%, fpr: 11.111%, time: 0.047 min
[ Epoch 12 ]


                                                                    

  - (Training)   loss:  78.67036, accuracy: 91.419 %, f1: 92.857%, fpr: 11.111%, time: 0.132 min


                                                              

  - (Test)       loss:  43.81419, accuracy: 90.789 %, f1: 92.063%, fpr: 7.812%, time: 0.050 min
[ Epoch 13 ]


                                                                    

  - (Training)   loss:  77.03905, accuracy: 91.475 %, f1: 94.444%, fpr: 8.824%, time: 0.125 min


                                                              

  - (Test)       loss:  41.13796, accuracy: 90.891 %, f1: 90.780%, fpr: 10.909%, time: 0.047 min
[ Epoch 14 ]


                                                                    

  - (Training)   loss:  74.26211, accuracy: 91.783 %, f1: 90.667%, fpr: 9.677%, time: 0.124 min


                                                              

  - (Test)       loss:  42.25829, accuracy: 90.891 %, f1: 86.822%, fpr: 19.403%, time: 0.047 min
[ Epoch 15 ]


                                                                    

  - (Training)   loss:  72.57838, accuracy: 91.977 %, f1: 92.105%, fpr: 9.677%, time: 0.130 min


                                                              

  - (Test)       loss:  42.68607, accuracy: 90.955 %, f1: 90.476%, fpr: 13.433%, time: 0.048 min
[ Epoch 16 ]


                                                                    

  - (Training)   loss:  71.03976, accuracy: 92.107 %, f1: 92.537%, fpr: 5.714%, time: 0.123 min


                                                              

  - (Test)       loss:  41.02523, accuracy: 91.027 %, f1: 92.063%, fpr: 6.349%, time: 0.043 min
[ Epoch 17 ]


                                                                    

  - (Training)   loss:  69.64403, accuracy: 92.441 %, f1: 89.286%, fpr: 5.000%, time: 0.126 min


                                                              

  - (Test)       loss:  43.33023, accuracy: 91.163 %, f1: 84.298%, fpr: 16.176%, time: 0.043 min
[ Epoch 18 ]


                                                                    

  - (Training)   loss:  68.11218, accuracy: 92.506 %, f1: 82.353%, fpr: 21.622%, time: 0.137 min


                                                              

  - (Test)       loss:  41.89617, accuracy: 91.248 %, f1: 89.655%, fpr: 18.966%, time: 0.043 min
[ Epoch 19 ]


                                                                    

  - (Training)   loss:  67.40027, accuracy: 92.707 %, f1: 92.308%, fpr: 2.857%, time: 0.132 min


                                                              

  - (Test)       loss:  43.11784, accuracy: 91.248 %, f1: 88.550%, fpr: 12.903%, time: 0.044 min




In [55]:
torch.save(model, 'lstm_title.pt')

In [56]:
output_model = OutputModel(task=task)
output_model.update_weights(weights_filename='./lstm_title.pt')

2022-06-21 20:07:26,082 - clearml.model - INFO - No output storage destination defined, registering local model ./lstm_title.pt


In [58]:
task.mark_completed()
task.close()

In [16]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(
            X_clean['title_text'], X_clean['class'], test_size=0.33, random_state=256, stratify=X_clean['class'])

In [17]:
X_clean['class'] = y
X_clean = X_clean[X_clean['title_text'].str.len() > 0]
split = []
for index, row in X_clean.iterrows():
    if index in X_train.index:
        split.append('train')
    else:
        split.append('test')
X_clean['split'] = split

In [42]:
class DatasetNews(Dataset):
    SPLIT_TYPES = ["train", "test"]

    def __init__(self, data, preprocess_fn, split="train"):
        super(DatasetNews, self).__init__()
        if split not in self.SPLIT_TYPES:
            raise AttributeError(f"No such split type: {split}")

        self.split = split
        self.label = [i for i, c in enumerate(data.columns) if c == "class"][0]
        self.data_col = [i for i, c in enumerate(data.columns) if c == "title_text"][0]
        self.data = data[data["split"] == self.split]
        self.preprocess_fn = preprocess_fn

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        seq = self.preprocess_fn(self.data.iloc[idx, self.data_col].split())
        label = self.data.iloc[idx, self.label]
        return (seq, label)


In [43]:
naive_vectorizer = NaiveVectorizer(X_clean.loc[X_clean["split"] == "train", "title_text"])

def get_datasets():
    train_dataset = DatasetNews(data=X_clean, preprocess_fn=naive_vectorizer.trim_vectorize)
    test_dataset = DatasetNews(data=X_clean, preprocess_fn=naive_vectorizer.trim_vectorize, split="test")
        
    return train_dataset, test_dataset

In [44]:
config = {
    'n_epochs': 20,
    'lr': 1e-3,
    'batch_size': 128, 
    'optimizer': 'Adam',
    'hidden_dim': 128, 
    'embedding_dim': 50,
    'dropout': 0.5, 
    'num_layers': 2
}

In [45]:
training_data, test_data = get_datasets()
train_dataloader = DataLoader(training_data, batch_size=config['batch_size'], shuffle=True, collate_fn=custom_collate_fn)
test_dataloader = DataLoader(test_data, batch_size=config['batch_size'], shuffle=True, collate_fn=custom_collate_fn)

In [46]:
len(naive_vectorizer.wv)

123812

In [47]:
task = Task.create(project_name='mlcloud_fake_news', task_name='lstm_title_text')
task.mark_started()
logger = task.get_logger()
task.connect(config)

{'n_epochs': 20,
 'lr': 0.001,
 'batch_size': 128,
 'optimizer': 'Adam',
 'hidden_dim': 128,
 'embedding_dim': 50,
 'dropout': 0.5,
 'num_layers': 2}

In [48]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMClassifier(embedding_dim=config['embedding_dim'],hidden_dim=config['hidden_dim'], vocab_size=len(naive_vectorizer.wv)+1, classes=1, batch_size=config['batch_size'], dropout_prob=config['dropout'], num_layers=config['num_layers']).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=config['lr']) 
loss = nn.BCELoss()
train(model=model, training_data=train_dataloader, test_data=test_dataloader, optimizer=optimizer, device=DEVICE, loss_fn=loss)

[ Epoch 0 ]


                                                                    

  - (Training)   loss:  194.18669, accuracy: 72.108 %, f1: 80.000%, fpr: 8.333%, time: 0.949 min


                                                              

  - (Test)       loss:  77.41392, accuracy: 80.126 %, f1: 73.684%, fpr: 7.143%, time: 0.164 min
[ Epoch 1 ]


                                                                    

  - (Training)   loss:  140.88395, accuracy: 82.855 %, f1: 91.667%, fpr: 12.500%, time: 0.953 min


                                                              

  - (Test)       loss:  72.63732, accuracy: 84.327 %, f1: 57.143%, fpr: 5.556%, time: 0.167 min
[ Epoch 2 ]


                                                                    

  - (Training)   loss:  127.85306, accuracy: 85.137 %, f1: 73.684%, fpr: 27.273%, time: 0.955 min


                                                              

  - (Test)       loss:  119.96224, accuracy: 83.433 %, f1: 71.795%, fpr: 42.857%, time: 0.161 min
[ Epoch 3 ]


                                                                    

  - (Training)   loss:  128.10158, accuracy: 85.202 %, f1: 76.923%, fpr: 7.692%, time: 0.955 min


                                                              

  - (Test)       loss:  44.49917, accuracy: 90.852 %, f1: 93.333%, fpr: 8.696%, time: 0.166 min
[ Epoch 4 ]


                                                                    

  - (Training)   loss:  99.14051, accuracy: 89.209 %, f1: 90.909%, fpr: 11.111%, time: 0.950 min


                                                              

  - (Test)       loss:  54.23066, accuracy: 89.177 %, f1: 78.788%, fpr: 0.000%, time: 0.163 min
[ Epoch 5 ]


                                                                    

  - (Training)   loss:  77.82338, accuracy: 91.650 %, f1: 94.737%, fpr: 0.000%, time: 0.959 min


                                                              

  - (Test)       loss:  41.16588, accuracy: 91.712 %, f1: 94.444%, fpr: 5.263%, time: 0.170 min
[ Epoch 6 ]


                                                                    

  - (Training)   loss:  69.35303, accuracy: 92.602 %, f1: 93.333%, fpr: 20.000%, time: 0.960 min


                                                              

  - (Test)       loss:  33.90395, accuracy: 93.213 %, f1: 96.154%, fpr: 0.000%, time: 0.172 min
[ Epoch 7 ]


                                                                    

  - (Training)   loss:  64.76013, accuracy: 93.279 %, f1: 94.737%, fpr: 9.091%, time: 0.961 min


                                                              

  - (Test)       loss:  35.65445, accuracy: 93.633 %, f1: 88.235%, fpr: 5.263%, time: 0.171 min
[ Epoch 8 ]


                                                                    

  - (Training)   loss:  56.76382, accuracy: 94.266 %, f1: 92.308%, fpr: 14.286%, time: 0.962 min


                                                              

  - (Test)       loss:  30.76863, accuracy: 93.734 %, f1: 94.118%, fpr: 5.000%, time: 0.174 min
[ Epoch 9 ]


                                                                    

  - (Training)   loss:  52.51804, accuracy: 94.676 %, f1: 96.552%, fpr: 0.000%, time: 0.966 min


                                                              

  - (Test)       loss:  29.04408, accuracy: 94.612 %, f1: 94.444%, fpr: 0.000%, time: 0.178 min
[ Epoch 10 ]


                                                                    

  - (Training)   loss:  47.54799, accuracy: 95.248 %, f1: 100.000%, fpr: 0.000%, time: 0.965 min


                                                              

  - (Test)       loss:  25.14787, accuracy: 95.303 %, f1: 97.561%, fpr: 0.000%, time: 0.172 min
[ Epoch 11 ]


                                                                    

  - (Training)   loss:  44.96420, accuracy: 95.504 %, f1: 100.000%, fpr: 0.000%, time: 0.966 min


                                                              

  - (Test)       loss:  24.42709, accuracy: 95.341 %, f1: 100.000%, fpr: 0.000%, time: 0.166 min
[ Epoch 12 ]


                                                                    

  - (Training)   loss:  43.73905, accuracy: 95.603 %, f1: 96.552%, fpr: 16.667%, time: 0.980 min


                                                              

  - (Test)       loss:  24.50660, accuracy: 95.833 %, f1: 97.297%, fpr: 0.000%, time: 0.170 min
[ Epoch 13 ]


                                                                    

  - (Training)   loss:  41.91168, accuracy: 95.895 %, f1: 100.000%, fpr: 0.000%, time: 0.976 min


                                                              

  - (Test)       loss:  19.98264, accuracy: 96.299 %, f1: 97.561%, fpr: 0.000%, time: 0.172 min
[ Epoch 14 ]


                                                                    

  - (Training)   loss:  37.95289, accuracy: 96.244 %, f1: 96.296%, fpr: 0.000%, time: 0.988 min


                                                              

  - (Test)       loss:  22.98481, accuracy: 95.689 %, f1: 100.000%, fpr: 0.000%, time: 0.176 min
[ Epoch 15 ]


                                                                    

  - (Training)   loss:  38.51204, accuracy: 96.275 %, f1: 95.238%, fpr: 0.000%, time: 0.978 min


                                                              

  - (Test)       loss:  19.00905, accuracy: 96.414 %, f1: 90.909%, fpr: 9.524%, time: 0.166 min
[ Epoch 16 ]


                                                                    

  - (Training)   loss:  32.24758, accuracy: 96.847 %, f1: 100.000%, fpr: 0.000%, time: 0.969 min


                                                              

  - (Test)       loss:  16.19737, accuracy: 96.943 %, f1: 95.000%, fpr: 5.882%, time: 0.173 min
[ Epoch 17 ]


                                                                    

  - (Training)   loss:  39.61317, accuracy: 96.148 %, f1: 100.000%, fpr: 0.000%, time: 0.964 min


                                                              

  - (Test)       loss:  17.29373, accuracy: 96.910 %, f1: 77.419%, fpr: 14.286%, time: 0.172 min
[ Epoch 18 ]


                                                                    

  - (Training)   loss:  28.39631, accuracy: 97.196 %, f1: 100.000%, fpr: 0.000%, time: 0.964 min


                                                              

  - (Test)       loss:  17.55394, accuracy: 96.770 %, f1: 94.118%, fpr: 0.000%, time: 0.175 min
[ Epoch 19 ]


                                                                    

  - (Training)   loss:  26.96396, accuracy: 97.361 %, f1: 100.000%, fpr: 0.000%, time: 0.970 min


                                                              

  - (Test)       loss:  15.10979, accuracy: 97.100 %, f1: 97.436%, fpr: 5.556%, time: 0.175 min




In [49]:
torch.save(model, 'lstm_title_text.pt')

In [50]:
output_model = OutputModel(task=task)
output_model.update_weights(weights_filename='./lstm_title_text.pt')

2022-06-21 21:52:18,170 - clearml.model - INFO - No output storage destination defined, registering local model ./lstm_title_text1.pt


In [51]:
task.mark_completed()
task.close()