In [0]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from pytorch_pretrained_bert import BertTokenizer, BertConfig
from pytorch_pretrained_bert import BertAdam, BertForSequenceClassification
from tqdm import tqdm, trange
import pandas as pd
import io
import numpy as np
import matplotlib.pyplot as plt

import nltk
from nltk.stem import LancasterStemmer, SnowballStemmer, WordNetLemmatizer
from nltk.corpus import stopwords 
from nltk.tokenize import sent_tokenize
from nltk.tokenize import RegexpTokenizer
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')

import os
import re
import json
import csv

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import pairwise_distances,cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import f1_score
from pathlib import Path

import random
random.seed()
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning) 

# specify GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/huang627158768/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/huang627158768/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /home/huang627158768/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Helper Functions

In [0]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
def fromat_and_tokenize(queries):
    sentences = ["[CLS] " + query + " [SEP]" for query in queries]
  # Tokenize with BERT tokenizer
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    return tokenized_texts; 

def pad_sentences(texts,max_length):
    input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in texts], maxlen=max_length, dtype="long", truncating="post", padding="post")
    return input_ids

def create_attention_masks(input_ids):
    # Create attention masks
    attention_masks = []
    # Create a mask of 1s for each token followed by 0s for padding
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)
    return attention_masks
def setup_dataloader(inputs,labels,batch_size):
    sentences = fromat_and_tokenize(inputs)
    input_ids = pad_sentences(sentences,512)
    attention_masks = create_attention_masks(input_ids)
    
    prediction_inputs = torch.tensor(input_ids)
    prediction_masks = torch.tensor(attention_masks)
    prediction_labels = torch.tensor(labels)
 
    prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
    prediction_sampler = SequentialSampler(prediction_data)
    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)
    return prediction_dataloader 
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    print("predicted: ",pred_flat,"actual: ",labels_flat)
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

## Load the claims and related sentencs from the disk

In [0]:
import json
with open('mie1624/related_sentences.json', 'r') as f:
    related_sentences = json.load(f)

with open('mie1624/train.json', 'r') as f:
    claims = json.load(f)
    
labels = []
inputs = []
ids = []
for claim, index in zip(claims,related_sentences):
    concat_sentence = claim['claim']
    for sentence in related_sentences[index]:
        concat_sentence = concat_sentence + " " + sentence
    if(len(concat_sentence) > 512):   
        concat_sentence = concat_sentence[:512]
    inputs.append(concat_sentence)
    labels.append(claim['label'])
    ids.append(claim['id'])

## Load previous trained model

In [0]:
model = torch.load('mie1624/states/best_model.pt.tar')

In [0]:
model.eval()

In [0]:
model.cuda()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
   

In [0]:
def predict(dataloader):
    predictions , true_labels = [], []
    # Predict 
    eval_accuracy = 0
    nb_eval_steps = 0
    for batch in dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        # Telling the model not to compute or store gradients, saving memory and speeding up prediction
        with torch.no_grad():
        # Forward pass, calculate logit predictions
            logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()  
        # Store predictions and true labels
        predictions.append(logits)
        true_labels.append(label_ids)
        tmp_eval_accuracy = flat_accuracy(logits, label_ids)    
        eval_accuracy += tmp_eval_accuracy
        nb_eval_steps += 1

    accuracy = eval_accuracy/nb_eval_steps
    

    from sklearn.metrics import matthews_corrcoef
    matthews_set = []
    for i in range(len(true_labels)):
        matthews = matthews_corrcoef(true_labels[i],np.argmax(predictions[i], axis=1).flatten())
        matthews_set.append(matthews)
    flat_predictions = [item for sublist in predictions for item in sublist]
    flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
    flat_true_labels = [item for sublist in true_labels for item in sublist]
    
    matthews_score = matthews_corrcoef(flat_true_labels, flat_predictions)
    f1_score_r = f1_score(flat_true_labels, flat_predictions, average='macro')
    
    return accuracy, matthews_score, f1_score_r
    

## Split into training and testing data (using the same random_state = 2018 as in the previous training)

In [0]:
train_inputs, test_inptuts, train_labels, test_labels = train_test_split(inputs, labels, random_state=2018, test_size=0.1)

In [0]:
dataloader = setup_dataloader(test_inptuts,test_labels,4)

## Get prediction for testing data

In [0]:
accuracy, matthews_score, f1 = predict(dataloader)

predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 1 1 0] actual:  [0 1 0 1]
predicted:  [1 0 0 0] actual:  [0 1 0 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 0 1 1] actual:  [0 0 2 2]
predicted:  [0 0 1 0] actual:  [1 0 1 2]
predicted:  [0 2 1 1] actual:  [0 0 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 2 0] actual:  [1 0 1 0]
predicted:  [0 1 0 0] actual:  [0 2 0 1]
predicted:  [1 1 0 0] actual:  [1 1 0 1]
predicted:  [0 0 2 0] actual:  [2 2 2 0]
predicted:  [2 0 0 0] actual:  [2 0 0 1]
predicted:  [1 2 0 1] actual:  [1 1 0 0]
predicted:  [0 0 0 0] actual:  [1 2 0 2]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [1 0 0 1] actual:  [0 0 0 2]
predicted:  [2 1 1 1] actual:  [0 2 2 1]
predicted:  [0 0 0 1] actual:  [1 0 0 0]
predicted:  [0 0 1 1] actual:  [0 1 1 1]
predicted:  [0 0 1 1] actual:  [2 2 0 0]
predicted:  [1 1

predicted:  [0 1 1 1] actual:  [1 2 1 1]
predicted:  [0 1 0 0] actual:  [0 0 1 0]
predicted:  [1 0 0 2] actual:  [1 1 0 1]
predicted:  [0 0 1 2] actual:  [2 0 1 2]
predicted:  [0 1 1 0] actual:  [2 1 1 0]
predicted:  [0 0 0 0] actual:  [0 0 0 1]
predicted:  [0 0 0 1] actual:  [2 0 0 0]
predicted:  [1 0 2 1] actual:  [2 0 1 0]
predicted:  [0 0 1 1] actual:  [1 1 1 2]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 0 1 1] actual:  [1 2 0 0]
predicted:  [1 2 0 0] actual:  [1 2 1 1]
predicted:  [0 0 1 1] actual:  [1 0 0 1]
predicted:  [2 2 0 1] actual:  [0 1 0 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 1 2 0] actual:  [0 0 0 0]
predicted:  [2 0 1 0] actual:  [1 0 2 1]
predicted:  [1 0 1 0] actual:  [1 0 1 1]
predicted:  [0 0 1 2] actual:  [0 1 1 1]
predicted:  [0 1 0 0] actual:  [0 1 0 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 1 0 2] actual:  [0 1 0 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 1 1 1] actual:  [1 2 0 1]
predicted:  [0 0

## Print the accuracy, matthew_coeff, and f1 score

In [0]:
print("Test Accuracy: {0:0.2%}".format(accuracy))
print('Test Matthews_corrcoef score: {0:0.2%}'.format(matthews_score))
print('Test f1 score: {0:0.2%}'.format(f1))

Test Accuracy: 59.51%
Test Matthews_corrcoef score: 29.32%
Test f1 score: 48.19%


## Get predictions for training data

In [0]:
dataloader = setup_dataloader(train_inputs,train_labels,4)

In [0]:
accuracy, matthews_score, f1_score = predict(dataloader)

predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 0 0 0] actual:  [0 0 1 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 0 0 2] actual:  [1 0 0 2]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 2 1 1] actual:  [0 2 1 1]
predicted:  [0 1 1 0] actual:  [0 1 1 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 1 2 1] actual:  [0 1 2 1]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [2 1 1 1] actual:  [2 1 1 1]
predicted:  [0 0

predicted:  [0 1 0 0] actual:  [1 1 0 0]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 2 1 1] actual:  [1 2 1 1]
predicted:  [2 0 0 0] actual:  [2 0 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [1 0 0 2] actual:  [1 0 0 2]
predicted:  [2 0 0 0] actual:  [2 0 0 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [2 0 0 1] actual:  [2 0 0 1]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [1 2 1 0] actual:  [1 2 1 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [2 1 0 0] actual:  [2 1 0 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [2 0 1 0] actual:  [2 0 1 0]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 1

predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 2 0 1] actual:  [1 2 0 1]
predicted:  [2 1 0 1] actual:  [2 1 0 1]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 2 0 1] actual:  [0 2 0 1]
predicted:  [0 0 1 2] actual:  [0 0 1 2]
predicted:  [0 1 1 2] actual:  [0 1 1 2]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [2 2 1 1] actual:  [2 2 1 1]
predicted:  [1 0 0 1] actual:  [1 1 0 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [1 2 1 0] actual:  [1 2 1 0]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 1 2 0] actual:  [0 1 2 0]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [2 1 1 0] actual:  [2 1 1 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 1 1 2] actual:  [0 1 1 2]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 1

predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [1 0 2 2] actual:  [1 0 2 2]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 2 1 1] actual:  [1 2 1 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [0 1 2 0] actual:  [0 1 2 0]
predicted:  [1 2 0 0] actual:  [1 2 0 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 0 2] actual:  [1 0 0 2]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [1 2 0 1] actual:  [1 2 0 1]
predicted:  [0 0 1 0] actual:  [0 1 1 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 1

predicted:  [0 2 1 0] actual:  [0 2 1 0]
predicted:  [1 0 0 2] actual:  [1 0 0 2]
predicted:  [1 0 0 2] actual:  [1 0 0 2]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [2 0 1 2] actual:  [2 0 1 2]
predicted:  [0 2 1 1] actual:  [0 2 1 1]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 2 0 2] actual:  [0 2 0 2]
predicted:  [0 2 0 0] actual:  [0 2 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [2 1 2 1] actual:  [2 1 2 1]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [1 2 1 1] actual:  [1 2 1 1]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [1 1 2 0] actual:  [1 1 2 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 1

predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 2 2 0] actual:  [0 2 1 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [0 2 0 0] actual:  [0 2 0 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 2 1 2] actual:  [0 2 1 2]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [1 0 0 0] actual:  [1 2 0 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 1 2 1] actual:  [0 1 2 1]
predicted:  [2 0 1 0] actual:  [2 1 1 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 2 1 0] actual:  [0 2 1 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 1

predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [2 0 1 0] actual:  [2 0 1 0]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 0 1 2] actual:  [0 0 1 2]
predicted:  [1 2 0 0] actual:  [1 2 0 0]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [0 0 2 1] actual:  [0 0 2 1]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [2 0 1 2] actual:  [2 0 1 2]
predicted:  [1 2 0 0] actual:  [1 2 0 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [2 0 0 0] actual:  [2 0 0 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [2 0 1 0] actual:  [2 0 1 0]
predicted:  [1 0

predicted:  [2 0 1 0] actual:  [2 0 1 0]
predicted:  [0 0 2 2] actual:  [0 0 2 2]
predicted:  [2 0 0 0] actual:  [2 0 0 0]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 2 0 2] actual:  [0 2 0 2]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 0 0 2] actual:  [1 0 0 2]
predicted:  [1 1 0 2] actual:  [1 1 0 2]
predicted:  [1 1 0 1] actual:  [1 1 2 1]
predicted:  [1 2 0 1] actual:  [1 2 0 1]
predicted:  [2 2 1 0] actual:  [2 2 1 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [2 0 1 1] actual:  [2 0 1 1]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [1 2 0 2] actual:  [1 2 0 2]
predicted:  [0 0

predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 0 2 1] actual:  [1 0 2 1]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [2 1 1 1] actual:  [2 1 1 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [2 0 0 0] actual:  [2 0 0 0]
predicted:  [1 2 0 0] actual:  [1 2 0 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 1 2 0] actual:  [1 1 2 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [2 1 0 0] actual:  [2 1 0 0]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [2 1 0 1] actual:  [2 1 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 0

predicted:  [2 0 0 1] actual:  [2 0 0 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [2 0 1 1] actual:  [2 0 1 1]
predicted:  [1 1 0 2] actual:  [1 1 0 2]
predicted:  [0 2 0 1] actual:  [0 2 0 1]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [2 2 0 1] actual:  [2 2 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 2 1 1] actual:  [1 2 1 1]
predicted:  [0 2 0 2] actual:  [0 2 0 2]
predicted:  [0 2 0 2] actual:  [2 2 0 2]
predicted:  [1 2 0 1] actual:  [1 2 2 1]
predicted:  [0 0

predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 2 0 0] actual:  [0 2 0 0]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [2 0 2 0] actual:  [2 0 2 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [2 0 2 0] actual:  [2 0 2 0]
predicted:  [1 1 0 2] actual:  [1 1 0 2]
predicted:  [2 1 0 0] actual:  [2 1 0 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 1 2 1] actual:  [0 1 2 1]
predicted:  [1 1 2 0] actual:  [1 1 2 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [2 0 1 0] actual:  [2 0 1 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 1

predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [2 0 2 2] actual:  [2 0 2 2]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [1 0 0 0] actual:  [1 0 2 0]
predicted:  [0 2 0 1] actual:  [0 2 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 0 1 2] actual:  [1 0 1 2]
predicted:  [0 2 0 1] actual:  [0 2 0 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 1

predicted:  [2 2 0 1] actual:  [2 2 0 1]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [1 0 1 2] actual:  [1 0 1 2]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [1 2 0 1] actual:  [1 2 0 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 2 1 1] actual:  [0 1 1 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 0 1 2] actual:  [1 0 1 2]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [2 1 0 2] actual:  [2 1 0 2]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 1 0 2] actual:  [0 1 0 2]
predicted:  [1 2 0 0] actual:  [1 2 0 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 1

predicted:  [2 0 2 2] actual:  [2 0 2 2]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 2 0 1] actual:  [1 2 0 1]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [0 1 2 0] actual:  [0 1 2 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [2 1 0 0] actual:  [2 1 0 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [2 0 1 0] actual:  [2 0 1 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 0 2 0] actual:  [0 0 2 0]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [0 0

predicted:  [1 0 1 2] actual:  [1 0 1 2]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 0 0 1] actual:  [0 2 0 1]
predicted:  [0 1 0 0] actual:  [0 1 1 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [1 1 1 0] actual:  [1 1 1 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [1 2 0 2] actual:  [1 2 0 2]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 2 1 2] actual:  [0 2 1 2]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 0

predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 2 1 1] actual:  [1 2 1 1]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [0 1 0 2] actual:  [0 1 0 2]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 1 0] actual:  [1 0 1 1]
predicted:  [1 2 2 1] actual:  [1 2 2 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 0 2 1] actual:  [1 0 2 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 0 2 0] actual:  [1 1 2 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 1 2 0] actual:  [1 1 2 0]
predicted:  [1 1 2 0] actual:  [1 1 2 0]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [1 2 2 0] actual:  [1 2 2 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [2 0 0 0] actual:  [2 0 0 0]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 2 1 0] actual:  [0 2 1 0]
predicted:  [1 1

predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 2 1 2] actual:  [1 2 1 2]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 0 0 0] actual:  [0 0 0 0]
predicted:  [0 0 2 1] actual:  [0 0 2 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [1 1 0 1] actual:  [1 1 0 1]
predicted:  [0 1 0 1] actual:  [0 1 0 1]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 0 0 1] actual:  [0 0 0 2]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [2 0 1 1] actual:  [2 0 1 1]
predicted:  [0 1 1 1] actual:  [0 1 1 1]
predicted:  [0 1 2 1] actual:  [0 1 2 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [2 1 1 2] actual:  [2 1 1 2]
predicted:  [1 0 2 0] actual:  [1 0 2 0]
predicted:  [0 2

predicted:  [1 2 0 0] actual:  [1 2 0 0]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [2 0 0 1] actual:  [2 0 0 1]
predicted:  [0 0 0 2] actual:  [0 0 0 2]
predicted:  [0 0 0 1] actual:  [0 0 0 1]
predicted:  [2 1 0 2] actual:  [2 1 0 2]
predicted:  [1 0 0 1] actual:  [1 0 0 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 1 0 0] actual:  [0 1 0 0]
predicted:  [1 1 1 1] actual:  [1 1 1 1]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [1 0 1 0] actual:  [1 0 1 0]
predicted:  [0 0 2 1] actual:  [0 0 2 1]
predicted:  [0 1 1 0] actual:  [0 1 1 0]
predicted:  [0 0 1 1] actual:  [0 0 1 1]
predicted:  [1 1 0 0] actual:  [1 1 0 0]
predicted:  [0 2 0 1] actual:  [0 2 0 1]
predicted:  [1 0 0 0] actual:  [1 0 0 0]
predicted:  [1 0 1 1] actual:  [1 0 1 1]
predicted:  [0 2 1 1] actual:  [0 2 1 1]
predicted:  [0 0 1 0] actual:  [0 0 1 0]
predicted:  [0 0

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [0]:
print("Train Accuracy: {0:0.2%}".format(accuracy))
print('Train Matthews_corrcoef score: {0:0.2%}'.format(matthews_score))
print('Train f1 score: {0:0.2%}'.format(f1_score))

Train Accuracy: 98.95%
Train Matthews_corrcoef score: 98.22%
Train f1 score: 98.59%
