In [1]:
import sys
sys.path.insert(1, '../..')

import torch
import torch.nn as nn
import random
import pandas as pd
import numpy as np
import time

random.seed(33)

from library.evaluation import ConfusionMatrix

model_name = "ReinforcementLearning"
unique_name = "BERT_Finetuned"

In [2]:
vectors = np.loadtxt("../../data/processed/vectors/Phemernr2_BERT_base_finetuned_vectors.txt", delimiter=",")
vectors.shape

(6425, 768)

In [3]:
data = pd.read_csv("../../data/processed/phemernr2_dataset_with_tvt.csv", lineterminator="\n")
data.head()

Unnamed: 0,tweet_id,tweet_text,label,label2,topic,tvt,cv_fold,tt,tvt2
0,552833795142209536,The East London Mosque would like to offer its...,non-rumours,non-rumours,charliehebdo-all-rnr-threads,test,2,test,training
1,580318210609696769,BREAKING - A Germanwings Airbus A320 plane rep...,rumours,true,germanwings-crash-all-rnr-threads,training,3,training,validation
2,552798891994009601,Reports that two of the dead in the #CharlieHe...,rumours,true,charliehebdo-all-rnr-threads,test,2,test,validation
3,576790814942236672,After #Putin disappeared Russian TV no longer ...,non-rumours,non-rumours,putinmissing-all-rnr-threads,test,2,test,validation
4,499678822598340608,Saw #Ferguson for myself. #justiceformichaelbr...,non-rumours,non-rumours,ferguson-all-rnr-threads,training,3,training,training


In [4]:
labels_str = data['label2'].unique().tolist()
labels_str

['non-rumours', 'true', 'unverified', 'false']

In [5]:
labels = []
for i, d in data.iterrows():
    lab = labels_str.index(d['label2'])
#     labels.append([1 if j == lab else 0 for j in range(len(labels_str))])
    labels.append(lab)
labels[:10]

[0, 1, 1, 0, 0, 0, 0, 2, 0, 0]

In [6]:
train_vectors = np.array([vectors[i] for i, d in data.iterrows() if d['tvt2'] == 'training'])
val_vectors = np.array([vectors[i] for i, d in data.iterrows() if d['tvt2'] == 'validation'])
test_vectors = np.array([vectors[i] for i, d in data.iterrows() if d['tvt2'] == 'testting'])

train_labels = np.array([labels[i] for i, d in data.iterrows() if d['tvt2'] == 'training'])
val_labels = np.array([labels[i] for i, d in data.iterrows() if d['tvt2'] == 'validation'])
test_labels = np.array([labels[i] for i, d in data.iterrows() if d['tvt2'] == 'testting'])

In [7]:
print(train_vectors.shape)
print(val_vectors.shape)
print(test_vectors.shape)

print(train_labels.shape)
print(val_labels.shape)
print(test_labels.shape)

(4326, 768)
(1463, 768)
(636, 768)
(4326,)
(1463,)
(636,)


In [8]:
from collections import deque

n_actions = 4

class Simulation:
    
    def __init__(self, vectors, labels):
        self.vectors = vectors
        self.labels = labels
        
        self.n_step = 0
        self.done = False
    
    def step(self, action):
        if action == self.labels[self.n_step]:
            reward = 2
        else:
            reward = 0

        self.n_step += 1
        if self.n_step >= self.vectors.shape[1]:
            self.done = True
            
        info = {
            "target": self.labels[self.n_step]
        }
        return torch.Tensor(self.vectors[self.n_step]), reward, self.done, info
    
    def reset(self):
        self.n_step = 0
        return torch.Tensor(self.vectors[self.n_step])
    
env = Simulation(train_vectors, train_labels)
next_state, reward, done, info = env.step(action=0)
print(f"{next_state.shape},\n {reward},\n {done},\n {info}")

torch.Size([768]),
 2,
 False,
 {'target': 0}


In [9]:
class ClassifierNet(nn.Module):

    def __init__(self, n_input, n_output):
        super().__init__()
        self.online = nn.Sequential(
            nn.Linear(n_input, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, n_output),
        )

        self.target = copy.deepcopy(self.online)

        # Q_target parameters are frozen.
        for p in self.target.parameters():
            p.requires_grad = False

    def forward(self, input, model):
        if model == "online":
            return self.online(input)
        elif model == "target":
            return self.target(input)

In [10]:
class Classifier:
    def __init__(self, n_vectors, n_action, save_dir):
        self.n_vectors = n_vectors
        self.n_action = n_action
        self.save_dir = save_dir

        self.use_cuda = torch.cuda.is_available()

        # Mario's DNN to predict the most optimal action - we implement this in the Learn section
        self.net = ClassifierNet(self.n_vectors, self.n_action).float()
        if self.use_cuda:
            self.net = self.net.to(device="cuda")

        self.exploration_rate = 1
        self.exploration_rate_decay = 0.99999975
        self.exploration_rate_min = 0.1
        self.curr_step = 0

        self.save_every = 5e5  # no. of experiences between saving Mario Net
        self.memory = deque(maxlen=100000)
        self.batch_size = 32
        
        self.gamma = 0.7

        self.optimizer = torch.optim.Adam(self.net.parameters(), lr=0.00025)
        self.loss_fn = torch.nn.SmoothL1Loss()

        self.burnin = 1e4  # min. experiences before training
        self.learn_every = 3  # no. of experiences between updates to Q_online
        self.sync_every = 1e4  # no. of experiences between Q_target & Q_online sync

    def act(self, state, prediction=False):
        """
            Given a state, choose an epsilon-greedy action
            
            Inputs:
            state(LazyFrame): A single observation of the current state, dimension is (state_dim)
    
            Outputs:
            action_idx (int): An integer representing which action Mario will perform
        """
        # EXPLORE
        if not prediction and np.random.rand() < self.exploration_rate:
            action_idx = np.random.randint(self.n_action)

        # EXPLOIT
        else:
            state = state.__array__()
            if self.use_cuda:
                state = torch.tensor(state).cuda()
            else:
                state = torch.tensor(state)
            state = state.unsqueeze(0)
            action_values = self.net(state, model="online")
            action_idx = torch.argmax(action_values, axis=1).item()

        # decrease exploration_rate
        self.exploration_rate *= self.exploration_rate_decay
        self.exploration_rate = max(self.exploration_rate_min, self.exploration_rate)

        # increment step
        self.curr_step += 1
        return action_idx

    def cache(self, state, next_state, action, reward, done):
        """
        Store the experience to self.memory (replay buffer)

        Inputs:
        state (LazyFrame),
        next_state (LazyFrame),
        action (int),
        reward (float),
        done(bool))
        """
        state = state.__array__()
        next_state = next_state.__array__()

        if self.use_cuda:
            state = torch.tensor(state).cuda()
            next_state = torch.tensor(next_state).cuda()
            action = torch.tensor([action]).cuda()
            reward = torch.tensor([reward]).cuda()
            done = torch.tensor([done]).cuda()
        else:
            state = torch.tensor(state)
            next_state = torch.tensor(next_state)
            action = torch.tensor([action])
            reward = torch.tensor([reward])
            done = torch.tensor([done])

        self.memory.append((state, next_state, action, reward, done,))

    def recall(self):
        """
        Retrieve a batch of experiences from memory
        """
        batch = random.sample(self.memory, self.batch_size)
        state, next_state, action, reward, done = map(torch.stack, zip(*batch))
        return state, next_state, action.squeeze(), reward.squeeze(), done.squeeze()

    def learn(self):
        """Update online action value (Q) function with a batch of experiences"""
        if self.curr_step % self.sync_every == 0:
            self.sync_Q_target()

        if self.curr_step % self.save_every == 0:
            self.save()

        if self.curr_step < self.burnin:
            return None, None

        if self.curr_step % self.learn_every != 0:
            return None, None

        # Sample from memory
        state, next_state, action, reward, done = self.recall()

        # Get TD Estimate
        td_est = self.td_estimate(state, action)

        # Get TD Target
        td_tgt = self.td_target(reward, next_state, done)

        # Backpropagate loss through Q_online
        loss = self.update_Q_online(td_est, td_tgt)

        return (td_est.mean().item(), loss)

    def td_estimate(self, state, action):
        current_Q = self.net(state, model="online")[
            np.arange(0, self.batch_size), action
        ]  # Q_online(s,a)
        return current_Q

    @torch.no_grad()
    def td_target(self, reward, next_state, done):
        next_state_Q = self.net(next_state, model="online")
        best_action = torch.argmax(next_state_Q, axis=1)
        next_Q = self.net(next_state, model="target")[
            np.arange(0, self.batch_size), best_action
        ]
        return (reward + (1 - done.float()) * self.gamma * next_Q).float()

    def update_Q_online(self, td_estimate, td_target):
        loss = self.loss_fn(td_estimate, td_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def sync_Q_target(self):
        self.net.target.load_state_dict(self.net.online.state_dict())

    def save(self):
        save_path = (
            self.save_dir / f"mario_net_{int(self.curr_step // self.save_every)}.chkpt"
        )
        torch.save(
            dict(model=self.net.state_dict(), exploration_rate=self.exploration_rate),
            save_path,
        )
        print(f"MarioNet saved to {save_path} at step {self.curr_step}")

In [11]:
import time, datetime, copy
import matplotlib.pyplot as plt
from pathlib import Path


class MetricLogger:
    def __init__(self, save_dir):
        self.save_log = save_dir / "log"
        with open(self.save_log, "w") as f:
            f.write(
                f"{'Episode':>8}{'Step':>8}{'Epsilon':>10}{'MeanReward':>15}"
                f"{'MeanLength':>15}{'MeanLoss':>15}{'MeanQValue':>15}"
                f"{'TimeDelta':>15}{'Time':>20}\n"
            )
        self.ep_rewards_plot = save_dir / "reward_plot.jpg"
        self.ep_lengths_plot = save_dir / "length_plot.jpg"
        self.ep_avg_losses_plot = save_dir / "loss_plot.jpg"
        self.ep_avg_qs_plot = save_dir / "q_plot.jpg"

        # History metrics
        self.ep_rewards = []
        self.ep_lengths = []
        self.ep_avg_losses = []
        self.ep_avg_qs = []

        # Moving averages, added for every call to record()
        self.moving_avg_ep_rewards = []
        self.moving_avg_ep_lengths = []
        self.moving_avg_ep_avg_losses = []
        self.moving_avg_ep_avg_qs = []

        # Current episode metric
        self.init_episode()

        # Timing
        self.record_time = time.time()

    def log_step(self, reward, loss, q):
        self.curr_ep_reward += reward
        self.curr_ep_length += 1
        if loss:
            self.curr_ep_loss += loss
            self.curr_ep_q += q
            self.curr_ep_loss_length += 1

    def log_episode(self):
        "Mark end of episode"
        self.ep_rewards.append(self.curr_ep_reward)
        self.ep_lengths.append(self.curr_ep_length)
        if self.curr_ep_loss_length == 0:
            ep_avg_loss = 0
            ep_avg_q = 0
        else:
            ep_avg_loss = np.round(self.curr_ep_loss / self.curr_ep_loss_length, 5)
            ep_avg_q = np.round(self.curr_ep_q / self.curr_ep_loss_length, 5)
        self.ep_avg_losses.append(ep_avg_loss)
        self.ep_avg_qs.append(ep_avg_q)

        self.init_episode()

    def init_episode(self):
        self.curr_ep_reward = 0.0
        self.curr_ep_length = 0
        self.curr_ep_loss = 0.0
        self.curr_ep_q = 0.0
        self.curr_ep_loss_length = 0

    def record(self, episode, epsilon, step):
        mean_ep_reward = np.round(np.mean(self.ep_rewards[-100:]), 3)
        mean_ep_length = np.round(np.mean(self.ep_lengths[-100:]), 3)
        mean_ep_loss = np.round(np.mean(self.ep_avg_losses[-100:]), 3)
        mean_ep_q = np.round(np.mean(self.ep_avg_qs[-100:]), 3)
        self.moving_avg_ep_rewards.append(mean_ep_reward)
        self.moving_avg_ep_lengths.append(mean_ep_length)
        self.moving_avg_ep_avg_losses.append(mean_ep_loss)
        self.moving_avg_ep_avg_qs.append(mean_ep_q)

        last_record_time = self.record_time
        self.record_time = time.time()
        time_since_last_record = np.round(self.record_time - last_record_time, 3)

        print(
            f"Episode {episode} - "
            f"Step {step} - "
            f"Epsilon {epsilon} - "
            f"Mean Reward {mean_ep_reward} - "
            f"Mean Length {mean_ep_length} - "
            f"Mean Loss {mean_ep_loss} - "
            f"Mean Q Value {mean_ep_q} - "
            f"Time Delta {time_since_last_record} - "
            f"Time {datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
        )

        with open(self.save_log, "a") as f:
            f.write(
                f"{episode:8d}{step:8d}{epsilon:10.3f}"
                f"{mean_ep_reward:15.3f}{mean_ep_length:15.3f}{mean_ep_loss:15.3f}{mean_ep_q:15.3f}"
                f"{time_since_last_record:15.3f}"
                f"{datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'):>20}\n"
            )

        for metric in ["ep_rewards", "ep_lengths", "ep_avg_losses", "ep_avg_qs"]:
            plt.plot(getattr(self, f"moving_avg_{metric}"))
            plt.savefig(getattr(self, f"{metric}_plot"))
            plt.clf()

In [12]:
import time

use_cuda = torch.cuda.is_available()
print(f"Using CUDA: {use_cuda}")
print()

save_dir = Path("checkpoints") / datetime.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
save_dir.mkdir(parents=True)

env = Simulation(train_vectors, train_labels)
agent = Classifier(train_vectors.shape[1], n_action=4, save_dir=save_dir)

logger = MetricLogger(save_dir)

start = time.time()
episodes = 50000
for e in range(episodes):
    state = env.reset()
    acts = []

    # Play the game!
    while True:

        # Run agent on the state
        action = agent.act(state)
        acts.append(action)

        # Agent performs action
        next_state, reward, done, info = env.step(action)

        # Remember
        agent.cache(state, next_state, action, reward, done)

        # Learn
        q, loss = agent.learn()

        # Logging
        logger.log_step(reward, loss, q)

        # Update state
        state = next_state

        # Check if end of game
        if done:
            break

    logger.log_episode()

    if e % 3000 == 0:
        logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)
        print("Actions taken :", set(acts))
        
print(f"Execution Time : {round(time.time() - start, 2)} seconds")

Using CUDA: True

Episode 0 - Step 768 - Epsilon 0.9998080184067972 - Mean Reward 402.0 - Mean Length 768.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 0.13 - Time 2021-10-20T10:51:13
Actions taken : {0, 1, 2, 3}
Episode 3000 - Step 3768 - Epsilon 0.9990584434249461 - Mean Reward 0.44 - Mean Length 1.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 0.98 - Time 2021-10-20T10:51:14
Actions taken : {1}
Episode 6000 - Step 6768 - Epsilon 0.9983094304136343 - Mean Reward 0.52 - Mean Length 1.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 0.93 - Time 2021-10-20T10:51:14
Actions taken : {2}
Episode 9000 - Step 9768 - Epsilon 0.9975609789515425 - Mean Reward 0.6 - Mean Length 1.0 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 1.013 - Time 2021-10-20T10:51:15
Actions taken : {2}
Episode 12000 - Step 12768 - Epsilon 0.9968130886176716 - Mean Reward 0.58 - Mean Length 1.0 - Mean Loss 0.0 - Mean Q Value 0.162 - Time Delta 4.703 - Time 2021-10-20T10:51:20
Actions taken : {3}
Episode 15000 

<Figure size 432x288 with 0 Axes>

In [13]:
idx = 345

example = torch.Tensor(val_vectors[idx])
prediction = agent.act(example, prediction=True)
print("Agent Prediction : ", prediction)
print("Correct Answer : ", val_labels[idx])

Agent Prediction :  0
Correct Answer :  0


In [14]:
def predictions(agent, vectors):
    results = []
    for vec in vectors:
        results.append(agent.act(torch.Tensor(vec), prediction=True))
    return np.array(results)

In [15]:
print("Multiclass Classification using Reinforcement Learning")
print("\nValidation Set")
preds = predictions(agent, val_vectors)
print(f"Predictions : {preds.shape}")

# preds = preds.cpu().numpy()
print(np.unique(preds))

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in val_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=f"{model_name} Validation"
)
conf_mat.evaluate(classes=labels_str)

print("\nTest Set")
preds = predictions(agent, test_vectors)
print(f"Predictions : {preds.shape}")

# preds = preds.cpu().numpy()
print(np.unique(preds))

conf_mat = ConfusionMatrix(
    labels=np.array([[1 if j == v else 0 for j in range(len(labels_str))] for v in test_labels]),
    predictions=np.array([[1 if j == p else 0 for j in range(len(labels_str))] for p in preds]),
    binary=False,
    model_name=f"{model_name} Test"
)
conf_mat.evaluate(classes=labels_str)

Multiclass Classification using Reinforcement Learning

Validation Set
Predictions : (1463,)
[0 1 2 3]
1463 vs 1463
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 81.426 %
- Recall : 95.49 %
- F1 : 0.87899

Class true Evaluation
- Precision : 82.308 %
- Recall : 44.398 %
- F1 : 0.57682

Class unverified Evaluation
- Precision : 71.538 %
- Recall : 55.689 %
- F1 : 0.62626

Class false Evaluation
- Precision : 83.942 %
- Recall : 78.767 %
- F1 : 0.81272

Combined Evaluation
- Accuracy : 80.861 %
- Precision : 79.803 %
- Recall : 68.586 %
- F1 : 0.73771

- Average Confidence : 100.0 %
Model, Combined,,,,non-rumours,,,true,,,unverified,,,false,,,
ReinforcementLearning Validation, 80.861, 79.803, 68.586, 0.73771, 81.426, 95.49, 0.87899, 82.308, 44.398, 0.57682, 71.538, 55.689, 0.62626, 83.942, 78.767, 0.81272, 

Test Set
Predictions : (636,)
[0 1 2 3]
636 vs 636
Multi Class Evaluation

Class non-rumours Evaluation
- Precision : 84.096 %
- Recall : 95.309 %
- F1 : 0.89352