# Google colab setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%%capture
!pip install datasets kaggle eli5 torchmetrics

In [3]:
!cp -r /content/drive/MyDrive/big_data_project/* .

# General setup

In [1]:
import scipy
import numpy as np
def monkeypath_itemfreq(sampler_indices):
   return zip(*np.unique(sampler_indices, return_counts=True))

scipy.stats.itemfreq=monkeypath_itemfreq

In [2]:
import os
import re
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import eli5

sys.path.append(".")
from eli5.lime import TextExplainer
from nltk.corpus import stopwords
from tqdm import tqdm
from datasets import load_dataset
from torchmetrics import Accuracy, Precision, Recall, F1Score, MetricCollection
from data_pipeline import get_dataloaders, text_preprocess, isot_clean
from models.lstm import LSTMNet, train, evaluate, inference

In [18]:
DATASET_ID = "liar"
MODEL = "lstm"
(train_loader, val_loader, test_loader), (tokenizer, vocab) = get_dataloaders(dataset_id=DATASET_ID, model=MODEL)

# LSTM

In [19]:
model_config = {
    "vocab_size": len(vocab),
    "embedding_dim": 128,
    "hidden_dim": 64,
    "output_dim": 1,
    "n_layers": 2,
    "bidirectional": True,
    "dropout": 0.2
}

In [20]:
model = LSTMNet(**model_config)

In [21]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
model = model.to(device)
optimizer = optim.Adam(model.parameters(),lr=1e-4)
criterion = nn.BCELoss(reduction="none")
criterion = criterion.to(device)

## Dataset 1 (Fake News)

In [None]:
EPOCHS = 12

metrics_group = MetricCollection(
    [
        Accuracy(task="binary"),
        Precision(task="binary"),
        Recall(task="binary"),
        F1Score(task="binary")
    ]
).to(device)
class_weight = torch.tensor([1.0, 1.0])
best_score = None
best_state_dict = None

for epoch in range(EPOCHS):
    print(f"Epoch {epoch}")

    train_loss, train_metrics = train(
        model,
        train_loader,
        optimizer,
        criterion,
        metrics_group,
        class_weight,
        device
    )
    metrics_group.reset()
    valid_loss, valid_metrics = evaluate(
        model,
        val_loader,
        criterion,
        metrics_group,
        class_weight,
        device
    )
    metrics_group.reset()

    valid_f1 = valid_metrics["BinaryF1Score"].cpu().item()
    if (best_score is None) or (valid_f1 > best_score):
        best_score = valid_f1
        best_state_dict = model.state_dict()

    print("Train")
    print(f"Loss: {train_loss:.3f}", end=", ")
    print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in train_metrics.items()]))

    print("Validation")
    print(f"Loss: {valid_loss:.3f}", end=", ")
    print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in valid_metrics.items()]))
    print()

Epoch 0
Train
Loss: 0.582, BinaryAccuracy: 70.51, BinaryPrecision: 80.86, BinaryRecall: 52.00, BinaryF1Score: 63.30
Validation
Loss: 0.502, BinaryAccuracy: 73.93, BinaryPrecision: 96.98, BinaryRecall: 49.61, BinaryF1Score: 65.64

Epoch 1
Train
Loss: 0.387, BinaryAccuracy: 83.45, BinaryPrecision: 88.55, BinaryRecall: 75.99, BinaryF1Score: 81.79
Validation
Loss: 0.343, BinaryAccuracy: 85.85, BinaryPrecision: 92.86, BinaryRecall: 77.80, BinaryF1Score: 84.66

Epoch 2
Train
Loss: 0.328, BinaryAccuracy: 86.64, BinaryPrecision: 88.86, BinaryRecall: 83.11, BinaryF1Score: 85.89
Validation
Loss: 0.399, BinaryAccuracy: 86.05, BinaryPrecision: 88.48, BinaryRecall: 83.01, BinaryF1Score: 85.66

Epoch 3
Train
Loss: 0.269, BinaryAccuracy: 89.10, BinaryPrecision: 91.20, BinaryRecall: 86.01, BinaryF1Score: 88.53
Validation
Loss: 0.268, BinaryAccuracy: 89.24, BinaryPrecision: 94.92, BinaryRecall: 83.01, BinaryF1Score: 88.57

Epoch 4
Train
Loss: 0.208, BinaryAccuracy: 91.52, BinaryPrecision: 92.58, Binary

## Dataset 2 (ISOT Fake News)

In [16]:
EPOCHS = 7

metrics_group = MetricCollection(
    [
        Accuracy(task="binary"),
        Precision(task="binary"),
        Recall(task="binary"),
        F1Score(task="binary")
    ]
).to(device)
class_weight = torch.tensor([1.0, 1.0])
best_score = None
best_state_dict = None

for epoch in range(EPOCHS):
    print(f"Epoch {epoch}")

    train_loss, train_metrics = train(
        model,
        train_loader,
        optimizer,
        criterion,
        metrics_group,
        class_weight,
        device
    )
    metrics_group.reset()
    valid_loss, valid_metrics = evaluate(
        model,
        val_loader,
        criterion,
        metrics_group,
        class_weight,
        device
    )
    metrics_group.reset()

    valid_f1 = valid_metrics["BinaryF1Score"].cpu().item()
    if (best_score is None) or (valid_f1 > best_score):
        best_score = valid_f1
        best_state_dict = model.state_dict()

    print("Train")
    print(f"Loss: {train_loss:.3f}", end=", ")
    print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in train_metrics.items()]))

    print("Validation")
    print(f"Loss: {valid_loss:.3f}", end=", ")
    print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in valid_metrics.items()]))
    print()

Epoch 0
Train
Loss: 0.383, BinaryAccuracy: 82.62, BinaryPrecision: 84.75, BinaryRecall: 76.71, BinaryF1Score: 80.53
Validation
Loss: 0.239, BinaryAccuracy: 90.16, BinaryPrecision: 91.00, BinaryRecall: 88.79, BinaryF1Score: 89.88

Epoch 1
Train
Loss: 0.197, BinaryAccuracy: 92.43, BinaryPrecision: 91.88, BinaryRecall: 91.97, BinaryF1Score: 91.93
Validation
Loss: 0.161, BinaryAccuracy: 93.92, BinaryPrecision: 93.56, BinaryRecall: 94.10, BinaryF1Score: 93.83

Epoch 2
Train
Loss: 0.142, BinaryAccuracy: 94.81, BinaryPrecision: 94.57, BinaryRecall: 94.35, BinaryF1Score: 94.46
Validation
Loss: 0.153, BinaryAccuracy: 94.26, BinaryPrecision: 96.14, BinaryRecall: 92.02, BinaryF1Score: 94.03

Epoch 3
Train
Loss: 0.156, BinaryAccuracy: 94.70, BinaryPrecision: 93.84, BinaryRecall: 94.92, BinaryF1Score: 94.38
Validation
Loss: 0.143, BinaryAccuracy: 94.09, BinaryPrecision: 94.40, BinaryRecall: 93.53, BinaryF1Score: 93.96

Epoch 4
Train
Loss: 0.133, BinaryAccuracy: 95.25, BinaryPrecision: 95.42, Binary

In [17]:
model.load_state_dict(best_state_dict)
test_loss, test_metrics = evaluate(
    model,
    test_loader,
    criterion,
    metrics_group,
    class_weight,
    device
)
metrics_group.reset()

print("Test")
print(f"Loss: {test_loss:.3f}", end=", ")
print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in test_metrics.items()]))

Test
Loss: 0.100, BinaryAccuracy: 96.41, BinaryPrecision: 96.66, BinaryRecall: 96.12, BinaryF1Score: 96.39


## Dataset 3 (Liar)

In [None]:
EPOCHS = 20

metrics_group = MetricCollection(
    [
        Accuracy(task="binary"),
        Precision(task="binary"),
        Recall(task="binary"),
        F1Score(task="binary")
    ]
).to(device)
class_weight = torch.tensor([6.25, 1.2])
best_score = None
best_state_dict = None

for epoch in range(EPOCHS):
    print(f"Epoch {epoch}")

    train_loss, train_metrics = train(
        model,
        train_loader,
        optimizer,
        criterion,
        metrics_group,
        class_weight,
        device
    )
    metrics_group.reset()
    valid_loss, valid_metrics = evaluate(
        model,
        val_loader,
        criterion,
        metrics_group,
        class_weight,
        device
    )
    metrics_group.reset()

    valid_f1 = valid_metrics["BinaryF1Score"].cpu().item()
    if (best_score is None) or (valid_f1 > best_score):
        best_score = valid_f1
        best_state_dict = model.state_dict()

    print("Train")
    print(f"Loss: {train_loss:.3f}", end=", ")
    print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in train_metrics.items()]))

    print("Validation")
    print(f"Loss: {valid_loss:.3f}", end=", ")
    print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in valid_metrics.items()]))
    print()

Epoch 0
Train
Loss: 1.401, BinaryAccuracy: 50.30, BinaryPrecision: 85.43, BinaryRecall: 48.90, BinaryF1Score: 62.19
Validation
Loss: 1.324, BinaryAccuracy: 33.72, BinaryPrecision: 90.74, BinaryRecall: 26.37, BinaryF1Score: 40.86

Epoch 1
Train
Loss: 1.392, BinaryAccuracy: 45.19, BinaryPrecision: 87.01, BinaryRecall: 40.49, BinaryF1Score: 55.26
Validation
Loss: 1.300, BinaryAccuracy: 42.83, BinaryPrecision: 89.77, BinaryRecall: 38.57, BinaryF1Score: 53.95

Epoch 2
Train
Loss: 1.381, BinaryAccuracy: 52.17, BinaryPrecision: 87.56, BinaryRecall: 49.87, BinaryF1Score: 63.54
Validation
Loss: 1.323, BinaryAccuracy: 30.84, BinaryPrecision: 90.68, BinaryRecall: 22.69, BinaryF1Score: 36.30

Epoch 3
Train
Loss: 1.364, BinaryAccuracy: 55.15, BinaryPrecision: 88.21, BinaryRecall: 53.51, BinaryF1Score: 66.61
Validation
Loss: 1.294, BinaryAccuracy: 50.16, BinaryPrecision: 90.46, BinaryRecall: 47.62, BinaryF1Score: 62.40

Epoch 4
Train
Loss: 1.329, BinaryAccuracy: 61.69, BinaryPrecision: 89.35, Binary

In [None]:
model.load_state_dict(best_state_dict)
test_loss, test_metrics = evaluate(
    model,
    test_loader,
    criterion,
    metrics_group,
    class_weight,
    device
)
metrics_group.reset()

print("Test")
print(f"Loss: {test_loss:.3f}", end=", ")
print(", ".join([f"{k}: {v.cpu().item() * 100:.2f}" for k, v in test_metrics.items()]))

Test
Loss: 3.492, BinaryAccuracy: 66.25, BinaryPrecision: 84.54, BinaryRecall: 72.95, BinaryF1Score: 78.32


# Save model

In [21]:
torch.save(
    {
        "epoch": epoch+1,
        "model_state_dict": model.state_dict(),
        "model_config": model_config,
        "optimizer_state_dict": optimizer.state_dict()
    },
    f"checkpoints/{MODEL}_{DATASET_ID}.tar"
)

# Reload, check inference

## Dataset 3

In [9]:
checkpoint = torch.load(f"checkpoints/{MODEL}_{DATASET_ID}.tar", map_location=device)
model = LSTMNet(**checkpoint["model_config"])
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()
model = model.to(device)

In [18]:
hf_dset = load_dataset("liar")

In [21]:
test_hf_dset = hf_dset["test"].to_pandas()
sample = test_hf_dset.sample(random_state=42)
ground_truth = 0 if sample["label"].values[0] == 3 else 1
sample_text = sample["statement"].values[0]
print(sample_text)

Nearly 20% of our residents are born abroad.


In [22]:
print(f"GT: {ground_truth}")
pred = inference(model, sample_text, device=device, processor=(tokenizer, vocab))
print(f"Pred: {pred.argmax()}, Prob: {pred.max():.3f}")

GT: 0
Pred: 0, Prob: 0.948


# Explainability (LIME)

### Dataset 1

In [None]:
df_train = pd.read_csv("./data/Dset1/train.csv")

In [None]:
model.load_state_dict(best_state_dict)
ID = 110
sample, label = df_train["text"][ID], df_train["label"][ID]
print(sample)
print(label)
inference(model, sample, device=device, processor=(tokenizer, vocab))

Barbara Walters is retired, Oprah Winfrey is running a network and Megyn Kelly sees an opening. “It’s there for the taking right now,” she said in a recent interview. And what is there for the taking? What those famous hosts had accomplished: conducting the sort of interviews that could transfix a nation. “Those were the biggest spots to go for an interview if you had something you wanted to get off your chest, if you were in the middle of a scandal or a major news story and you wanted to do a     to get past it or to go on the record,” she said. She quickly added: “And I’m here!” Making the Oprah or Barbara Walters leap is a remarkably tricky business. Many have tried before, with daytime shows or   specials, only to run into a wall and return to a more comfortable corner of television. And the interview special is a relic from a time on television when what was broadcast on the Big Four networks was what mattered most. But on Tuesday, Ms. Kelly, the Fox News anchor and host of “The K

array([0.97796891, 0.02203109])

In [None]:
te = TextExplainer(n_samples=5000, random_state=42, sampler=eli5.lime.samplers.MaskingTextSampler())

In [None]:
def model_adapter(input_strs):

    all_scores = []
    for input_str in input_strs:
        preds = inference(model, input_str, device=device, processor=(tokenizer, vocab))
        all_scores.append(preds)
    return np.stack(all_scores,axis=0)

In [None]:
te.fit(sample, model_adapter)

The loss 'log' was deprecated in v1.1 and will be removed in version 1.3. Use `loss='log_loss'` which is equivalent.


In [None]:
te.show_prediction(target_names=["Real", "Fake"], feature_names=te.vec_.get_feature_names_out())# force_weights=True)

Contribution?,Feature
4.978,Highlighted in text (sum)
-0.044,<BIAS>


In [None]:
te.metrics_

{'mean_KL_divergence': 0.1549419048865857, 'score': 0.9551988880623424}

### Dataset 2

In [10]:
fake_df = pd.read_csv("./data/Dset2/Fake.csv")
true_df = pd.read_csv("./data/Dset2/True.csv")

In [11]:
sample = true_df["text"][100]
print(sample)
inference(model, sample, isot=True, device=device, processor=(tokenizer, vocab))

WASHINGTON (Reuters) - Democratic Senator Elizabeth Warren is taking aim at budget chief Mick Mulvaney’s plan to fill the ranks of the U.S. consumer financial watchdog with political allies, according to letters seen by Reuters, the latest salvo in a broader battle over who should run the bureau. President Donald Trump last month appointed Mulvaney as acting director of the Consumer Financial Protection Bureau (CFPB), though the decision is being legally challenged by the agency’s deputy director, Leandra English, who says she is the rightful interim head. Mulvaney told reporters earlier this month he planned to bring in several political appointees to help overhaul the agency, but Warren warned in a pair of letters sent Monday to Mulvaney and the Office of Personnel Management (OPM), which oversees federal hiring, that doing so was inappropriate and potentially illegal. The CFPB is meant to be an independent agency staffed primarily by non-political employees. Hiring political appoint

array([0.99375183, 0.00624817])

In [12]:
sample = fake_df["text"][100]
print(sample)
inference(model, sample, isot=True, device=device, processor=(tokenizer, vocab))

Former Vice President Joe Biden was asked on Monday by Matt Lauer on NBC s  Today  to name something specific that Donald Trump has been  doing well. Well, that seems like a trick question since Trump has passed no major legislation and reaches across the aisle only to take shots at Democrats in his Twitter timeline during his morning rage-tweets, so Biden struggled to find something, anything, that Trump has done well since taking office. I think there s a number of things he s doing well. But even the things he s doing well, it s how he does them,  Biden said. It s more the tone of this administration that bothers me,  he continued. With all due respect, you haven t come up with one thing you think he s doing well,  Lauer said. Well, I think he married very well,  Biden joked.Although, Biden didn t mention which of Trump s three marriages he s speaking of. Trump s first marriage to Ivana ended after he had an affair with Marla Maples. Trump went on to marry Maples, then they divorced

array([0.00953132, 0.99046868])

In [None]:
te = TextExplainer(n_samples=5000, random_state=42, sampler=eli5.lime.samplers.MaskingTextSampler())

In [None]:
def model_adapter(input_strs):

    all_scores = []
    for input_str in input_strs:
        preds = inference(model, input_str, device=device, processor=(tokenizer, vocab))
        all_scores.append(preds)
    return np.stack(all_scores,axis=0)

In [None]:
te.fit(isot_clean(sample), model_adapter)

The loss 'log' was deprecated in v1.1 and will be removed in version 1.3. Use `loss='log_loss'` which is equivalent.


In [None]:
te.show_prediction(target_names=["Real", "Fake"], feature_names=te.vec_.get_feature_names_out())# force_weights=True)

Contribution?,Feature
5.833,Highlighted in text (sum)
0.238,<BIAS>


In [None]:
te.metrics_

{'mean_KL_divergence': 0.03152380202562918, 'score': 0.9867842505941959}

In [None]:
te.show_prediction(target_names=["Real", "Fake"], feature_names=te.vec_.get_feature_names_out())# force_weights=True)

Contribution?,Feature
9.698,Highlighted in text (sum)
-0.61,<BIAS>


In [None]:
te.metrics_

{'mean_KL_divergence': 0.05386959668705684, 'score': 0.9486396453819218}

# Kaggle submit (Dataset 1 only-evaluation)

In [None]:
submission_ids = []
submission_labels = []
model.load_state_dict(best_state_dict)
model.eval()
with torch.no_grad():
    for batch in test_loader:
        text, _, text_lengths, ids = batch
        text = text.to(device)
        text_lengths = text_lengths.to(device)
        submission_ids.extend(ids.tolist())
        predictions = model(text,text_lengths).round().int().squeeze().detach().cpu().tolist()
        submission_labels.extend(predictions)

In [None]:
df_test = pd.read_csv("./data/Dset1/test.csv")
for idx in df_test["id"]:
     if idx not in submission_ids:
        submission_ids.append(idx)
        submission_labels.append(1)

In [None]:
submission_df = pd.DataFrame.from_dict({"id": submission_ids, "label": submission_labels})

In [None]:
submission_df.to_csv("submission.csv",index=False)

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!kaggle competitions submit -c fake-news -f submission.csv -m "Uploaded from Google Colab"

100% 40.6k/40.6k [00:02<00:00, 19.6kB/s]
Successfully submitted to Fake News