In [1]:
# %load_ext autoreload
# %autoreload 2

import os, sys
# os.environ["CUDA_VISIBLE_DEVICES"]="1"

home_dir = os.environ['HOME']
sys.path.insert(0, os.path.join(home_dir, 'Dropbox/git/CMSC773'))
sys.path.insert(0, os.path.join(home_dir, 'Dropbox/git/bertviz'))
from bertviz import head_view

import dataloader
import tomotopy as tp
from itertools import chain
import tqdm
import pandas as pd
import numpy as np
import slda
import post_classifier
import bow

from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style('dark')

### Load Model

In [2]:
import torch
from transformers import BertForSequenceClassification, BertTokenizer

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
base_save_dir = '/home/hadi/Documents/CL2/deep_stuff/saved_models'
mdoel_save_dir = os.path.join(base_save_dir, 'model')
tokenizer_save_dir = os.path.join(base_save_dir, 'tokenizer')

model_load_dir = os.path.join(mdoel_save_dir, os.listdir(mdoel_save_dir)[0])
tokenizer_load_dir = os.path.join(tokenizer_save_dir, os.listdir(tokenizer_save_dir)[0])

In [5]:
model = BertForSequenceClassification.from_pretrained(model_load_dir).to(device)
tokenizer = BertTokenizer.from_pretrained(tokenizer_load_dir)

### Load Data

In [6]:
base_dir = 'Documents/CL2/umd_reddit_suicidewatch_dataset_v2/processed_data'
load_dir = os.path.join(home_dir, base_dir)

x_np, y_np = np.load(os.path.join(load_dir, 'test_data.npy'), allow_pickle=True)

In [7]:
x_test = list(map(lambda z: torch.tensor(z, dtype=torch.long, device=device), x_np))
y_test = torch.tensor(y_np, dtype=torch.long, device=device)

In [8]:
def valid(model, x, y, batch_size=8):
    num_incorrect = 0
    y_pred_all = []
    
    model.eval()
    
    num_samples = len(y)
    for i in tqdm.tqdm(range(int(np.ceil(num_samples / batch_size)))):
        indices = slice(i*batch_size, (i+1)*batch_size)

        batch_inputs = list(map(lambda z: z[indices], x))
        batch_labels = y[indices]

        with torch.no_grad():
            logits = model(*batch_inputs)[0]        

        y_pred = torch.argmax(torch.softmax(logits, dim=1), dim=1).tolist()

        num_incorrect += sum([abs(tup[0] - tup[1]) for tup in zip(batch_labels.view(-1).tolist(), y_pred)])
        y_pred_all.extend(y_pred)
        
    print(num_incorrect / num_samples)

    return(y_pred_all)

In [9]:
y_pred_all = valid(model, x_test, y_test, batch_size=256)

100%|██████████| 57/57 [06:35<00:00,  6.94s/it]

0.3372826162267027





In [18]:
num_correct = 0

for true, pred in zip(y_test.view(-1).tolist(), y_pred_all):
    if true == pred:
        num_correct += 1
        
accuracy = num_correct / len(y_pred_all)
accuracy

0.6627173837732973

In [10]:
from sklearn.metrics import precision_recall_fscore_support

In [11]:
precision_recall_fscore_support(y_test.view(-1).tolist(), y_pred_all)

(array([0.72737251, 0.36693704]),
 array([0.84015994, 0.22732711]),
 array([0.77970857, 0.28073286]),
 array([10254,  4179]))

In [12]:
precision_recall_fscore_support(y_test.view(-1).tolist(), y_pred_all, average='weighted')

(0.6230102962617377, 0.6627173837732973, 0.6352327520431591, None)

In [18]:
precision_recall_fscore_support(y_test.view(-1).tolist(), y_pred_all, average='micro')

(0.6234324118339916, 0.6234324118339916, 0.6234324118339916, None)

In [19]:
precision_recall_fscore_support(y_test.view(-1).tolist(), y_pred_all, average='macro')

(0.5305416665469647, 0.5287787820374686, 0.5291677101732248, None)