# Installs

## wandb

You will need to fetch your api key from wandb.ai

In [1]:
!pip install wandb -q

## Misc

This may take a while

In [2]:
!pip install torchsummaryX
!pip install slugify
!pip install pytorch_pretrained_bert

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## imports

In [3]:
import torch
import time as Time
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM
from pytorch_pretrained_bert import BertConfig
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchsummaryX import summary
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from PIL import Image
import pandas as pd
import torchvision
from torchvision import datasets, models, transforms

import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score
import gc

import zipfile
import pandas as pd
from tqdm import tqdm
import os
import datetime
import wandb
import copy
from pathlib import Path
import sys

import warnings
warnings.filterwarnings('ignore')

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


# Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!mkdir '/content/glove'
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip -qo 'glove.6B.zip' -d '/content/glove'

# Setting

In [4]:
config = {
    "num_labels" : 2, # True or False Classification
    "lr" : 2e-3,
    "epochs" : 50,
    'batch_size' : 8,
    #'LR scheduler': 'CosineAnnealingLR',
    'LR scheduler': 'ReduceLROnPlateau',
                'scheduler factor': 0.5,
    'scheduler threshold': 0.01,
            'scheduler patience': 5,
   #'scheduler Tmax': 0,
    } # Feel free to add more items here

# Read Data

In [5]:
# remeber to change the path if you are not on google colab and directly uploading
train_path = '/content/train2.tsv'
test_path = '/content/test2.tsv'
val_path = '/content/val2.tsv'

In [6]:
train_df = pd.read_csv(train_path, sep="\t", header=None)
test_df = pd.read_csv(test_path, sep="\t", header=None)
val_df = pd.read_csv(val_path, sep="\t", header=None)

# Fill nan (empty boxes) with 0
train_df = train_df.fillna(0)
test_df = test_df.fillna(0)
val_df = val_df.fillna(0)

train = train_df.values
test = test_df.values
val = val_df.values

In [7]:
# label: ground truth results from politifacts
labels = {'train':[train[i][2] for i in range(len(train))], 'test':[test[i][2] for i in range(len(test))], 'val':[val[i][2] for i in range(len(val))]}
# Short Statement
statements = {'train':[train[i][3] for i in range(len(train))], 'test':[test[i][3] for i in range(len(test))], 'val':[val[i][3] for i in range(len(val))]}
# Topic of Statement
subjects = {'train':[train[i][4] for i in range(len(train))], 'test':[test[i][4] for i in range(len(test))], 'val':[val[i][4] for i in range(len(val))]}
# Speaker
speakers = {'train':[train[i][5] for i in range(len(train))], 'test':[test[i][5] for i in range(len(test))], 'val':[val[i][5] for i in range(len(val))]}
# Speaker job or title
jobs = {'train':[train[i][6] for i in range(len(train))], 'test':[test[i][6] for i in range(len(test))], 'val':[val[i][6] for i in range(len(val))]}
# State of Relevance
states = {'train':[train[i][7] for i in range(len(train))], 'test':[test[i][7] for i in range(len(test))], 'val':[val[i][7] for i in range(len(val))]}
# party affiliation
affiliations = {'train':[train[i][8] for i in range(len(train))], 'test':[test[i][8] for i in range(len(test))], 'val':[val[i][8] for i in range(len(val))]}
# total history of speaker(count of barely true, false, half true, mostly true, pants on fire respectively)
credits = {'train':[train[i][9:14] for i in range(len(train))], 'test':[test[i][9:14] for i in range(len(test))], 'val':[val[i][9:14] for i in range(len(val))]}
# venue of statement
contexts = {'train':[train[i][14] for i in range(len(train))], 'test':[test[i][14] for i in range(len(test))], 'val':[val[i][14] for i in range(len(val))]}
# verdict justification from politifacts
justification = {'train':[train[i][15] for i in range(len(train))], 'test':[test[i][15] for i in range(len(test))], 'val':[val[i][15] for i in range(len(val))]}

In [8]:
# currently only do 2 way classfication & simplify 6 way label into true and false
# convert label to 2 hot based on verdict label
if config["num_labels"] ==2:
  def onehot(label):
    label_onehot = [0]*len(label)
    for i in range(len(label)):
      if label[i] =='true' or label[i] =='mostly-true' or label[i] =='half-true':
        label_onehot[i] = [1,0]
      elif label[i] =='barely-true' or label[i] =='false' or label[i] =='pants-fire':
        label_onehot[i] = [0,1]
      else:
        print('Unexpected Label. Set vector to [0]')
    return label_onehot

In [9]:
# Convert to one hot
label_onehot = {'train':onehot(labels['train']), 'test':onehot(labels['test']), 'val':onehot(labels['val'])}

In [10]:
# Meta data
metadata = {'train':[0]*len(train), 'val':[0]*len(val), 'test':[0]*len(test)}

for i in range(len(train)):
    subject = subjects['train'][i]
    if subject == 0:
        subject = 'None'

    speaker = speakers['train'][i]
    if speaker == 0:
        speaker = 'None'

    job = jobs['train'][i]
    if job == 0:
        job = 'None'

    state = states['train'][i]
    if state == 0:
        state = 'None'

    affiliation = affiliations['train'][i]
    if affiliation == 0:
        affiliation = 'None'

    context = contexts['train'][i]
    if context == 0 :
        context = 'None'
    if i == 0:
      print(subject)
    meta = subject + ' ' + speaker + ' ' + job + ' ' + state + ' ' + affiliation + ' ' + context
    if i == 0:
      print(meta)
    metadata['train'][i] = meta

for i in range(len(val)):
    subject = subjects['val'][i]
    if subject == 0:
        subject = 'None'

    speaker = speakers['val'][i]
    if speaker == 0:
        speaker = 'None'

    job = jobs['val'][i]
    if job == 0:
        job = 'None'

    state = states['val'][i]
    if state == 0:
        state = 'None'

    affiliation = affiliations['val'][i]
    if affiliation == 0:
        affiliation = 'None'

    context = contexts['val'][i]
    if context == 0 :
        context = 'None'

    meta = subject + ' ' + speaker + ' ' + job + ' ' + state + ' ' + affiliation + ' ' + context

    metadata['val'][i] = meta

for i in range(len(test)):
    subject = subjects['test'][i]
    if subject == 0:
        subject = 'None'

    speaker = speakers['test'][i]
    if speaker == 0:
        speaker = 'None'

    job = jobs['test'][i]
    if job == 0:
        job = 'None'

    state = states['test'][i]
    if state == 0:
        state = 'None'

    affiliation = affiliations['test'][i]
    if affiliation == 0:
        affiliation = 'None'

    context = contexts['test'][i]
    if context == 0 :
        context = 'None'

    meta = subject + ' ' + speaker + ' ' + job + ' ' + state + ' ' + affiliation + ' ' + context

    metadata['test'][i] = meta

abortion
abortion dwayne-bohac State representative Texas republican a mailer


In [11]:
# Credit score calculation
# barely true weighs 0.75, false weighs 0.9, half true weigh 0.5, mostly true weigh 0.2, pants on fire weigh 1
credit_score = {'train':[0]*len(train), 'val':[0]*len(val), 'test':[0]*len(test)}
for i in range(len(train)):
    credit = credits['train'][i]
    if sum(credit) == 0:
        score = 0.5
    else:
        score = (credit[3]*0.2 + credit[2]*0.5 + credit[0]*0.75 + credit[1]*0.9 + credit[4]*1)/(sum(credit))
    credit_score['train'][i] = [score for i in range(2304)]

for i in range(len(val)):
    credit = credits['val'][i]
    if sum(credit) == 0:
        score = 0.5
    else:
        score = (credit[3]*0.2 + credit[2]*0.5 + credit[0]*0.75 + credit[1]*0.9 + credit[4]*1)/(sum(credit))
    credit_score['val'][i] = [score for i in range(2304)]

for i in range(len(test)):
    credit = credits['test'][i]
    if sum(credit) == 0:
        score = 0.5
    else:
        score = (credit[3]*0.2 + credit[2]*0.5 + credit[0]*0.75 + credit[1]*0.9 + credit[4]*1)/(sum(credit))
    credit_score['test'][i] = [score for i in range(2304)]


# Dataset and Dataloader

In [12]:

# Loading the statements
X_train = statements['train']
y_train = label_onehot['train']

X_val = statements['val']
y_val = label_onehot['val']


X_test = statements['test']
y_test = label_onehot['test']

# Loading the justification
X_train_just = justification['train']

X_val_just = justification['val']


X_test_just = justification['test']


# Loading the meta data
X_train_meta = metadata['train']
X_val_meta = metadata['val']
X_test_meta = metadata['test']

# Loading Credit scores

X_train_credit = credit_score['train']
X_val_credit = credit_score['val']
X_test_credit = credit_score['test']

In [13]:
max_seq_length_stat = 64
max_seq_length_just = 256
max_seq_length_meta = 32

### Train Data

In [14]:
class TextDataset(torch.utils.data.Dataset):

    def __init__(self, xy_list ,transform=None): 
        '''
        Initializes the dataset.

        '''

        # Load the xy list

        self.x_y_list = xy_list
        self.length = len(xy_list[0])
        for i in range(self.length):
        #   Load in each statement and tokenize
            #print(self.x_y_list[0][i])
            tokenized_stat = tokenizer.tokenize(self.x_y_list[0][i])
            if len(tokenized_stat) > max_seq_length_stat:
              # clip if the statement is too long
                tokenized_stat = tokenized_stat[:max_seq_length_stat]

            # convert statement to ids
            ids_stat  = tokenizer.convert_tokens_to_ids(tokenized_stat)
            # pad the statement to given length
            padding = [0] * (max_seq_length_stat - len(ids_stat))

            ids_stat += padding
            # sanity check
            assert len(ids_stat) == max_seq_length_stat
            
            #if i == 1:
            #  print(ids_stat)
            ids_stat = torch.tensor(ids_stat)
            
            if self.x_y_list[1][i] == 0:
                self.x_y_list[1][i] = 'No justification'

            #print(self.x_y_list[1][i])
            tokenized_just = tokenizer.tokenize(self.x_y_list[1][i])
            if len(tokenized_just) > max_seq_length_just:
              # clip if the statement is too long
                tokenized_just = tokenized_just[:max_seq_length_just]

            # convert statement to ids
            ids_just  = tokenizer.convert_tokens_to_ids(tokenized_just)
            # pad the statement to given length
            padding = [0] * (max_seq_length_just - len(ids_just))

            ids_just += padding
            # sanity check
            assert len(ids_just) == max_seq_length_just

            #if i == 1:
            #  print(ids_just)
            ids_just = torch.tensor(ids_just)

            #print(self.x_y_list[2][i])
            tokenized_meta = tokenizer.tokenize(self.x_y_list[2][i])
            if len(tokenized_meta) > max_seq_length_meta:
              # clip if the statement is too long
                tokenized_meta = tokenized_meta[:max_seq_length_meta]

            # convert statement to ids
            ids_meta  = tokenizer.convert_tokens_to_ids(tokenized_meta)
            # pad the statement to given length
            padding = [0] * (max_seq_length_meta - len(ids_meta))

            ids_meta += padding
            # sanity check
            assert len(ids_meta) == max_seq_length_meta

            ids_meta = torch.tensor(ids_meta)
            
            credit_scr = torch.tensor(self.x_y_list[3][i]) # Credit score

            #if i == 1:
            #  print(credit_scr)

            label = torch.from_numpy(np.array(self.x_y_list[4][i]))

            #if i == 1:
            #  print(label)

            self.x_y_list[0][i] = ids_stat
            self.x_y_list[1][i] = ids_just
            self.x_y_list[2][i] = ids_meta
            self.x_y_list[3][i] = credit_scr
            self.x_y_list[4][i] = label

    def __len__(self):
        
        '''
        TODO: What do we return here?
        '''
        return self.length

    def __getitem__(self, ind):
        '''
        TODO: RETURN THE MFCC COEFFICIENTS AND ITS CORRESPONDING LABELS

        If you didn't do the loading and processing of the data in __init__,
        do that here.

        Once done, return a tuple of features and labels.
        '''
        
        ids_stat = self.x_y_list[0][ind] 
        ids_just = self.x_y_list[1][ind]
        ids_meta = self.x_y_list[2][ind] 
        credit_scr = self.x_y_list[3][ind] 
        label = self.x_y_list[4][ind]

        return ids_stat, ids_just, ids_meta, credit_scr, label



### Data - Hyperparameters

In [15]:
BATCH_SIZE = config['batch_size'] # Increase if your device can handle it

transforms = [] # set of tranformations
# You may pass this as a parameter to the dataset class above
# This will help modularize your implementation


### Data loaders

In [16]:
# get me RAMMM!!!! 
import gc 
gc.collect()
X_train_meta[0]

'abortion dwayne-bohac State representative Texas republican a mailer'

In [17]:
# Create objects for the dataset class
#train_data = TextDataset([X_train[:100], X_train_just[:100], X_train_meta[:100], X_train_credit[:100], y_train[:100]])
train_data = TextDataset([X_train, X_train_just, X_train_meta, X_train_credit, y_train])
val_data = TextDataset([X_val, X_val_just, X_val_meta, X_val_credit, y_val]) 
test_data = TextDataset([X_test, X_test_just, X_test_meta, X_test_credit, y_test]) 

# Do NOT forget to pass in the collate function as parameter while creating the dataloader
train_loader = torch.utils.data.DataLoader(train_data, num_workers= 4,
                                           batch_size=BATCH_SIZE, pin_memory= True,
                                           shuffle= True)
val_loader = torch.utils.data.DataLoader(val_data, num_workers= 2,
                                           batch_size=BATCH_SIZE, pin_memory= True,
                                           shuffle= True)
test_loader = torch.utils.data.DataLoader(test_data, num_workers= 2,
                                           batch_size=BATCH_SIZE, pin_memory= True,
                                           shuffle= True)

print("Batch size: ", BATCH_SIZE)
print("Train dataset samples = {}, batches = {}".format(train_data.__len__(), len(train_loader)))
print("Val dataset samples = {}, batches = {}".format(val_data.__len__(), len(val_loader)))
print("Test dataset samples = {}, batches = {}".format(test_data.__len__(), len(test_loader)))

Batch size:  8
Train dataset samples = 10240, batches = 1280
Val dataset samples = 1284, batches = 161
Test dataset samples = 1267, batches = 159


In [18]:
# sanity check
for data in train_loader:
    x, y, z, m, n = data
    print(x.shape, y.shape, z.shape)
    x, y, z, m, n = x.to(device), y.to(device), z.to(device), m.to(device), n.to(device) 
    break 

torch.Size([8, 64]) torch.Size([8, 256]) torch.Size([8, 32])


# Model Config

## Basic

In [19]:
config_bert = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768,
        num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072)

In [20]:
class BertLayerNorm(nn.Module):
        def __init__(self, hidden_size, eps=1e-12):
            """Construct a layernorm module in the TF style (epsilon inside the square root).
            """
            super(BertLayerNorm, self).__init__()
            self.weight = nn.Parameter(torch.ones(hidden_size))
            self.bias = nn.Parameter(torch.zeros(hidden_size))
            self.variance_epsilon = eps

        def forward(self, x):
            u = x.mean(-1, keepdim=True)
            s = (x - u).pow(2).mean(-1, keepdim=True)
            x = (x - u) / torch.sqrt(s + self.variance_epsilon)
            return self.weight * x + self.bias

In [21]:
class BertForSequenceClassification(nn.Module):
    def __init__(self, num_labels=2): # Change number of labels here.
        super(BertForSequenceClassification, self).__init__()
        self.num_labels = num_labels
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(config_bert.hidden_dropout_prob)
        self.classifier = nn.Linear(config_bert.hidden_size*3, num_labels)
        #self.fc1 = nn.Linear(config_bert.hidden_size*2, 512)
        nn.init.xavier_normal_(self.classifier.weight)

    '''def forward_once(self, x):
        # Forward pass
        output = self.cnn1(x)
        output = output.view(output.size()[0], -1)
        output = self.fc1(output)
        return output'''

    def forward_once(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
        _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
        pooled_output = self.dropout(pooled_output)
        #logits = self.classifier(pooled_output)

        return pooled_output

    def forward(self, input_ids1, input_ids2, input_ids3, credit_sc):
        # forward pass of input 1
        output1 = self.forward_once(input_ids1, token_type_ids=None, attention_mask=None, labels=None)
        # forward pass of input 2
        output2 = self.forward_once(input_ids2, token_type_ids=None, attention_mask=None, labels=None)

        output3 = self.forward_once(input_ids3, token_type_ids=None, attention_mask=None, labels=None)

        out = torch.cat((output1, output2, output3), 1)
        #print(out.shape)

        # Multiply the credit score with the output after concatnation

        out = torch.add(credit_sc, out)

        #out = self.fc1(out)
        logits = self.classifier(out)

        return logits

    def freeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = False

    def unfreeze_bert_encoder(self):
        for param in self.bert.parameters():
            param.requires_grad = True

## INIT

In [22]:
torch.cuda.empty_cache()
model = BertForSequenceClassification().to(device)

In [23]:
summary(model,x,y,z,m)

                                                    Kernel Shape  \
Layer                                                              
0_bert.embeddings.Embedding_word_embeddings         [768, 30522]   
1_bert.embeddings.Embedding_position_embeddings       [768, 512]   
2_bert.embeddings.Embedding_token_type_embeddings       [768, 2]   
3_bert.embeddings.BertLayerNorm_LayerNorm                  [768]   
4_bert.embeddings.Dropout_dropout                              -   
5_bert.encoder.layer.0.attention.self.Linear_query    [768, 768]   
6_bert.encoder.layer.0.attention.self.Linear_key      [768, 768]   
7_bert.encoder.layer.0.attention.self.Linear_value    [768, 768]   
8_bert.encoder.layer.0.attention.self.Dropout_d...             -   
9_bert.encoder.layer.0.attention.output.Linear_...    [768, 768]   
10_bert.encoder.layer.0.attention.output.Dropou...             -   
11_bert.encoder.layer.0.attention.output.BertLa...         [768]   
12_bert.encoder.layer.0.intermediate.Linear_dens

Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_bert.embeddings.Embedding_word_embeddings,"[768, 30522]","[8, 64, 768]",23440896.0,23440896.0
1_bert.embeddings.Embedding_position_embeddings,"[768, 512]","[8, 64, 768]",393216.0,393216.0
2_bert.embeddings.Embedding_token_type_embeddings,"[768, 2]","[8, 64, 768]",1536.0,1536.0
3_bert.embeddings.BertLayerNorm_LayerNorm,[768],"[8, 64, 768]",1536.0,768.0
4_bert.embeddings.Dropout_dropout,-,"[8, 64, 768]",,
...,...,...,...,...
416_bert.encoder.layer.11.output.BertLayerNorm_LayerNorm,[768],"[8, 32, 768]",,768.0
417_bert.pooler.Linear_dense,"[768, 768]","[8, 768]",,589824.0
418_bert.pooler.Tanh_activation,-,"[8, 768]",,
419_dropout,-,"[8, 768]",,


# Training Config

In [24]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()
scaler = torch.cuda.amp.GradScaler()

In [25]:
train_acc = []
val_acc = []
train_loss = []
val_loss = []

In [26]:
def train(model, criterion, optimizer, scheduler):
    since = Time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 100
    best_acc = 0

    scheduler.step()
    model.train()  # Set model to training mode
    running_loss = 0.0
    fakeness_corrects = 0
     # Progress Bar 
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train', ncols=5) 
    # Iterate over data.
    for data in train_loader:
        inputs = data[0:4]
        fakeness = data[-1]

        inputs1 = inputs[0] # News statement input
        inputs2 = inputs[1] # Justification input
        inputs3 = inputs[2] # Meta data input
        inputs4 = inputs[3] # Credit scores input

        inputs1 = inputs1.to(device)
        inputs2 = inputs2.to(device)
        inputs3 = inputs3.to(device)
        inputs4 = inputs4.to(device)

        fakeness = fakeness.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        # track history if only in train
        with torch.cuda.amp.autocast():
            outputs = model(inputs1, inputs2, inputs3, inputs4)
            outputs = F.softmax(outputs,dim=1)
            loss = criterion(outputs, torch.max(fakeness.float(), 1)[1])

        # statistics
        running_loss += loss.item() * inputs1.size(0)


        fakeness_corrects += torch.sum(torch.max(outputs, 1)[1] == torch.max(fakeness, 1)[1])
        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * fakeness_corrects / (config['batch_size']*(i + 1))),
            loss="{:.04f}".format(float(running_loss / (i + 1))),
            num_correct=fakeness_corrects.item(),
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        # loss.backward()
        scaler.scale(loss).backward()
        # optimizer.step()
        scaler.step(optimizer)
        scaler.update()


        batch_bar.update()

    batch_bar.close()
    epoch_loss = running_loss / len(X_train)


    fakeness_acc = fakeness_corrects.double() / len(X_train)

    print('Train total loss: {:.4f} '.format(epoch_loss))
    print('Train fakeness_acc: {:.4f}'.format(fakeness_acc))

    # Saving training acc and loss for each epoch
    fakeness_acc1 = fakeness_acc.data
    fakeness_acc1 = fakeness_acc1.cpu()
    fakeness_acc1 = fakeness_acc1.numpy()
    train_acc.append(fakeness_acc1)

    #epoch_loss1 = epoch_loss.data
    #epoch_loss1 = epoch_loss1.cpu()
    #epoch_loss1 = epoch_loss1.numpy()
    train_loss.append(epoch_loss)

    return train_acc, train_loss

In [27]:
def validation(model, criterion, optimizer, scheduler):
    since = Time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 100
    best_acc = 0
    # Each epoch has a training and validation phase

    model.eval()   # Set model to evaluate mode
    batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val', ncols=5)

    running_loss = 0.0

    fakeness_corrects = 0

    # Iterate over data.
    for data in val_loader:
        inputs = data[0:4]
        fakeness = data[-1]
        inputs1 = inputs[0] # News statement input
        inputs2 = inputs[1] # Justification input
        inputs3 = inputs[2] # Meta data input
        inputs4 = inputs[3] # Credit scores input

        inputs1 = inputs1.to(device)
        inputs2 = inputs2.to(device)
        inputs3 = inputs3.to(device)
        inputs4 = inputs4.to(device)

        fakeness = fakeness.to(device)

        # forward
        # track history if only in train
        outputs = model(inputs1, inputs2, inputs3, inputs4)

        outputs = F.softmax(outputs,dim=1)

        loss = criterion(outputs, torch.max(fakeness.float(), 1)[1])
        # backward + optimize only if in training phase
        # statistics
        running_loss += loss.item() * inputs1.size(0)


        fakeness_corrects += torch.sum(torch.max(outputs, 1)[1] == torch.max(fakeness, 1)[1])

        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * fakeness_corrects / (config['batch_size']*(i + 1))),
            loss="{:.04f}".format(float(running_loss / (i + 1))),
            num_correct=fakeness_corrects.item(),
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))

        batch_bar.update()

    batch_bar.close()


    epoch_loss = running_loss / len(X_val)
    fakeness_acc = fakeness_corrects.double() / len(X_val)

    print('Validation total loss: {:.4f} '.format(epoch_loss ))
    print('Validation fakeness_acc: {:.4f}'.format(fakeness_acc))
    best_acc = fakeness_acc

    # Saving val acc and loss for each epoch
    fakeness_acc1 = fakeness_acc.data
    fakeness_acc1 = fakeness_acc1.cpu()
    fakeness_acc1 = fakeness_acc1.numpy()
    val_acc.append(fakeness_acc1)

    #epoch_loss1 = epoch_loss.data
    #epoch_loss1 = epoch_loss1.cpu()
    #epoch_loss1 = epoch_loss1.numpy()
    val_loss.append(epoch_loss)

    print('Best val Acc: {:4f}'.format(float(best_acc)))

    return val_acc, val_loss

In [28]:
wandb.login(key="3c0882202a0a1f93d55e16a0e94007adf1a84943")

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mngaddam[0m ([33mthe-spinning-top[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [29]:
run = wandb.init(
    name = "project-1", ## Wandb creates random run names if you skip this field
    reinit = True, ### Allows reinitalizing runs when you re-run this cell
    #id = '2lq40515', #Insert specific run id here if you want to resume a previous run
    #resume = "must", ### You need this to resume previous runs, but comment out reinit = True when using this
    #project="hw3p2", ### Project should be created in your wandb account 
    config=config, ### Wandb Config for your run
    entity="the-spinning-top"
)

# Training

In [30]:
lrlast = .0001
lrmain = .00001
optim1 = torch.optim.Adam(
    [
        {"params":model.bert.parameters(),"lr": lrmain},
        {"params":model.classifier.parameters(), "lr": lrlast},

   ])

#optim1 = optim.Adam(model.parameters(), lr=0.001)#,momentum=.9)
# Observe that all parameters are being optimized
optimizer_ft = optim1
criterion = nn.CrossEntropyLoss()

'''import focal_loss
loss_args = {"alpha": 0.5, "gamma": 2.0}
criterion = focal_loss.FocalLoss(*loss_args)'''

# Decay LR by a factor of 0.1 every 3 epochs
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=3, gamma=0.1)


### Training Setup

In [31]:
#sanity check
train_acc, train_loss = train(model, criterion, optimizer_ft, exp_lr_scheduler)



Train total loss: 0.5802 
Train fakeness_acc: 0.7137


In [32]:
#sanity check
val_acc, val_loss = validation(model, criterion, optimizer_ft, exp_lr_scheduler)

starting




Validation total loss: 0.5769 
Validation fakeness_acc: 0.7103
Best val Acc: 0.710280


In [35]:
val_acc

[array(0.71028037), array(0.72897196), array(0.72352025)]

In [None]:
# Iterate over number of epochs to train and evaluate your model
torch.cuda.empty_cache()

best_acc = 0.0 ### Monitor best accuracy in your run

for epoch in range(config['epochs']):
    print("\nEpoch {}/{}".format(epoch+1, config['epochs']))
    #t0 = time.time()

    train_acc, train_loss = train(model, criterion, optimizer_ft, exp_lr_scheduler)
    accuracy, val_loss = validation(model, criterion, optimizer_ft, exp_lr_scheduler)

    #scheduler.step(accuracy) #ReduceLRonPlateau
    exp_lr_scheduler.step() #StepLR

    ### Log metrics at each epoch in your run - Optionally, you can log at each batch inside train/eval functions (explore wandb documentation/wandb recitation)
    wandb.log({"train loss": train_loss, "validation accuracy": accuracy})

    ### Save checkpoint at each epoch
    ### Save checkpoint with information you want
    torch.save({'epoch': epoch,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer_ft.state_dict(),
              'loss': train_loss,
              'acc': accuracy}, 
        './model_checkpoint.pth')
      
      ### Save checkpoint in wandb
    wandb.save('checkpoint.pth')

    # Is your training time very high? Look into mixed precision training if your GPU (Tesla T4, V100, etc) can make use of it 
    # Refer - https://pytorch.org/docs/stable/notes/amp_examples.html
    #print('Duration:',time.time() - t0)
### Finish your wandb run
run.finish()


Epoch 1/50




Train total loss: 0.5308 
Train fakeness_acc: 0.7706
starting




Validation total loss: 0.5602 
Validation fakeness_acc: 0.7282
Best val Acc: 0.728193

Epoch 2/50




Train total loss: 0.5302 
Train fakeness_acc: 0.7716
starting




Validation total loss: 0.5616 
Validation fakeness_acc: 0.7251
Best val Acc: 0.725078

Epoch 3/50




Train total loss: 0.5281 
Train fakeness_acc: 0.7745
starting




Validation total loss: 0.5616 
Validation fakeness_acc: 0.7243
Best val Acc: 0.724299

Epoch 4/50




Train total loss: 0.5278 
Train fakeness_acc: 0.7773
starting




Validation total loss: 0.5616 
Validation fakeness_acc: 0.7243
Best val Acc: 0.724299

Epoch 5/50


Train:   6%|▌         | 79/1280 [00:23<05:57,  3.36it/s, acc=5.0809%, loss=0.2491, lr=0.0000, num_correct=515]