### Install dependencies and load common Variables
<font color='Red' size="3"><i>IMPORTANT: </i></font><font color='Black' size="3"><i>(( Always excecute this two cells at the beggining))</i></font>

**Dependencies:**

<font color='Red' size="3">Tested in Kaggle and Google Colab</i></font>

---------

ipython==5.5.0

pandas==1.3.3

numpy==1.19.5

seaborn==0.11.2

matplotlib==3.2.2

datasets==1.14.0

tensorflow==2.6.0

scikit-learn==0.22.2

transformers==4.5.1 or 4.12.2

sentence-transformers==2.1.0

---------


In [None]:
# IMORT LIBRARIES

import os

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pprint import pprint
from sklearn.metrics import classification_report
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import confusion_matrix, plot_confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import CategoricalCrossentropy
from tensorflow.keras.layers import GlobalMaxPool1D, concatenate
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization

from datasets import load_dataset

from transformers import AutoTokenizer, TFAutoModel
from sentence_transformers import SentenceTransformer


In [None]:
###      WRITE HERE THE MAIN FOLDER      ###

BASE_FOLDER = './'

###  ---------------------------------------------------------------------------------  ###



###  OTHER DIRECTORIES, PATHS AND NAMES  ###

TEST_DATA_DIR = BASE_FOLDER+'test-data/'
TRAIN_DATA_DIR = BASE_FOLDER+'train-val-data/'
CHECKPOINT_PATH = BASE_FOLDER + "model-checkpoint/TransferLearning_HEALTH_FACT_weights.ckpt"

# make the directory
try: os.mkdir(TRAIN_DATA_DIR)
except FileExistsError: pass

try: os.mkdir(TEST_DATA_DIR)
except FileExistsError: pass

try: os.mkdir('model-checkpoint') 
except FileExistsError: pass
  
# model names
MODEL1_NAME = 'deepset/sentence_bert'
MODEL2_NAME = "bert-base-cased"
# MODEL2_NAME = "giacomomiolo/scibert_reupload"

###  ---------------------------------------------------------------------------------  ###


### Load dataset and start Preprocessing

In [None]:
# LOAD DATASET 'HEALTH_FACT'
# if thrown --> ConnectionError: Couldn't reach https://drive.google.com/uc?export=download&id=1eTtRs5cUlBP5dXsx-FTAlmXuB6JQi2qj
# just try again.

dataset = load_dataset('health_fact')
dataset


In [None]:

pprint(dataset['train'][1])


In [None]:
# Convert TRAIN, TEST and VAL datasets into Pandas Dataframe, and only stay with columns claims, main_text and labels

df_train = pd.DataFrame(dataset['train'], columns=['claim', 'main_text', 'label'])
df_test = pd.DataFrame(dataset['test'], columns=['claim', 'main_text', 'label'])
df_val = pd.DataFrame(dataset['validation'], columns=['claim', 'main_text', 'label'])

# Concatenate train, test and val
df = pd.concat([df_train, df_test, df_val])

# delete dataset y dfs
del dataset, df_train, df_test, df_val

df.head()


In [None]:

len(df)


In [None]:
# this website https://huggingface.co/datasets/viewer/?dataset=health_fact 
# says --> NOTE: There are missing labels in the dataset and we have replaced them with -1.

# explore missing labels
mask = df['label'] == -1

print('Missing labels:', len(df[mask]), sep='\t')


In [None]:
# DROP MISSING DATA

print('Previous size: '+str(len(df)))

# drop rows with missing values
df.drop(df[mask].index, inplace=True)

print('Current size: '+str(len(df)))

# Reset index
df.reset_index(inplace=True, drop=True)  


In [None]:
# SPLIT MAIN TEXT

# Split main_text each Semicolon, Comma and Period
def split_text(text):
    text = text.replace(".", ",")
    text = text.replace(";", ",")
    return text.split(",")

df['main_text'] = df['main_text'].apply(split_text)
print(df['main_text'][0][:2])


### First transformer "sentence_bert"

If we want to Download or Clone the transforer 'Sentence_bert'

```
git lfs install
git clone https://huggingface.co/deepset/sentence_bert
```

but if we are going to download it on the go, then just continue

In [None]:
# LOAD MODEL SENTENCE_BERT

model = SentenceTransformer(MODEL1_NAME)


In [None]:
# CHECK GPU

tf.config.experimental.list_physical_devices("GPU")


In [None]:
# FUNCTIONS TO GET THE TOP K SIMILAR SENTENCES


def list_to_string(list_main_text):
  main_text = ""
  
  for seq in list_main_text:
      if seq == "": continue
      
      if seq[0] != " ": 
          main_text += " " + seq
      else: 
          main_text += seq

  return main_text
  


def get_top_k_similar_sentences(claim, list_main_text, k):
  # Insert claim in main_text to encode together
  list_main_text.insert(0, claim)

  # encode
  sentences_embeddings = model.encode(list_main_text)

  # Delete claim from main_text
  list_main_text.remove(claim)


  # Calculate cosine similarity
  cos_sim = cosine_similarity([sentences_embeddings[0]], sentences_embeddings[1:])


  # get the indices of the top k sentences 
  indices = sorted(range(len(cos_sim[0])), key=lambda i: cos_sim[0][i])[-k:] 

  # call the function
  list_main_text = list_to_string(np.array(list_main_text)[indices])

  return list_main_text
  

In [None]:
# FUNCTION TO CLEAR OUTPUT

from IPython.display import clear_output # this is for notebooks

def clear(index=0, each=1):
    #Clear each 50 iterations
    if index%each == 0:
        # this only works in notebooks
        clear_output(wait=False)
        # for windows
        if os.name == 'nt':
            _ = os.system('cls')
        # for mac and linux(here, os.name is 'posix')
        else:
            _ = os.system('clear')
            

In [None]:
# GET THE TOP K SIMILAR SENTENCES OF THE COLUMN 'main_text'
### -------------------------------------------------- ###
""" THIS IS A TRAINING PROCESS SO THIS LASTS SOME TIME
    To skip this step, we can jump into the next one, 
    and load the data from the disk.                   """
### -------------------------------------------------- ###


# disable the warning 'SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame'
pd.options.mode.chained_assignment = None  # default='warn'

k=7
# get the top 7 similar sentences 
for i in range(len(df['claim'])):
    try:
        df['main_text'][i] = get_top_k_similar_sentences(df['claim'][i], df['main_text'][i], k)
        clear(i, each=50)
    except: pass
clear()

# PRINT THE LAST DATA
last_index = len(df['main_text'])-1 
print('Clain:', df['claim'][last_index])
print('\n')
print('Main Text:', df['main_text'][last_index]) # Top k Sentences Joined


# SAVE DATAFRAME WITH TOP 7 SENTENSES
df.to_pickle(BASE_FOLDER+"df_data.pkl")
# delete dataset from the memory
del df


----------------

----------------

In [None]:
### LOAD DATASET WITH THE TOP 7 SENTENSES ###
""" (NOTE: we can skip all previous steps 
     by loading this file from the disk)  """"


df = pd.read_pickle(BASE_FOLDER + 'df_data.pkl')
df.head()


### plot the distribution to choose a sequence length for main_text and claims

In [None]:
sns.set_style('darkgrid')

# Plot distPlot
def distplot(seq_len):
  plt.figure(figsize=(16, 10))
  sns.distplot(seq_len)
  plt.show()
  

In [None]:

# Get the length of each claim and main_text
claim_seqs_len = df['claim'].apply(lambda x: len(x.split()))
main_text_seqs_len = df['main_text'].apply(lambda x: len(x.split()))


In [None]:

# distPlot for claims and main_texts
distplot(claim_seqs_len)
distplot(main_text_seqs_len)


In [None]:

# cut claims off in 45 because the plot shows that there is almost nothing higher than 45
CLAIM_SEQ_LEN = 45

# cut main_text off in 270 for the same reason
M_TEXT_SEQ_LEN = 270


## main transformer "bert-base-cased"

If we want to Download or Clone the transforer 'bert-base-cased'

```
git lfs install
git clone https://huggingface.co/bert-base-cased
```

but if we are going to download it on the go, then just continue

In [None]:

# get the tokens
tokenizer = AutoTokenizer.from_pretrained(MODEL2_NAME)


In [None]:

# Create the arrays to store input_ids and attention_mask for claims and main_texts
Xids_Claims = np.zeros((len(df), CLAIM_SEQ_LEN))
Xmask_Claims = np.zeros((len(df), CLAIM_SEQ_LEN))

Xids_Mtext = np.zeros((len(df), M_TEXT_SEQ_LEN))
Xmask_Mtext = np.zeros((len(df), M_TEXT_SEQ_LEN))


In [None]:

print(Xids_Claims.shape)
print(Xids_Mtext.shape)


In [None]:

# Get inputs_ids and attention_masks
for i, sequences in enumerate(zip(df['claim'], df['main_text'])):
    claim, main_text = sequences[0], sequences[1]                            

    tokens = tokenizer.encode_plus(claim, max_length=CLAIM_SEQ_LEN,
                               truncation=True, padding="max_length",
                               add_special_tokens=True, return_token_type_ids=False,
                               return_attention_mask=True, return_tensors='tf')
    Xids_Claims[i, :], Xmask_Claims[i, :] = tokens['input_ids'], tokens['attention_mask']

    tokens = tokenizer.encode_plus(main_text, max_length=M_TEXT_SEQ_LEN,
                               truncation=True, padding="max_length",
                               add_special_tokens=True, return_token_type_ids=False,
                               return_attention_mask=True, return_tensors='tf')
    Xids_Mtext[i, :], Xmask_Mtext[i, :] = tokens['input_ids'], tokens['attention_mask']
    

In [None]:
# PRINT CLAIMS (IDS AND MASK)

print(Xids_Claims)
print()
print(Xids_Claims.shape)

print('\n\n')

print(Xmask_Claims)
print()
print(Xmask_Claims.shape)


In [None]:
# PRINT MAIN TEXTS (IDS AND MASK)

print(Xids_Mtext)
print()
print(Xids_Mtext.shape)

print('\n\n')

print(Xmask_Mtext)
print()
print(Xmask_Mtext.shape)


#### Create One hot encoded Labels

In [None]:

# get the labels from the dataset
arr_labels = df['label'].values

# create a numpy array of zeros with shape (array size, ammount of labels)
labels = np.zeros((arr_labels.size, arr_labels.max()+1)) 
labels.shape


In [None]:

print(np.arange(arr_labels.size))
print(arr_labels)
print(arr_labels.size)


In [None]:

# Create a range of values from 0 to 12224 and within that we add 1 to the 12225 labels 
# in the position of its number (0,1,2,3)
labels[np.arange(arr_labels.size), arr_labels] = 1
labels


In [None]:
# EXTRACT 10% OF THE DATA FOR TEST

test_size = int(len(Xids_Claims) * 0.1) # 10%

Xids_Claims_test = Xids_Claims[0:test_size]
Xmask_Claims_test = Xmask_Claims[0:test_size]
Xids_Mtext_test = Xids_Mtext[0:test_size]
Xmask_Mtext_test = Xmask_Mtext[0:test_size]
labels_test = labels[0:test_size]

Xids_Claims = Xids_Claims[test_size:]
Xmask_Claims = Xmask_Claims[test_size:]
Xids_Mtext = Xids_Mtext[test_size:]
Xmask_Mtext = Xmask_Mtext[test_size:]
labels = labels[test_size:]


print(Xids_Claims.shape)
print(Xids_Claims_test.shape)


#### Save all the data and Reload it

In [None]:
# SAVE ALL THE DATA

# train and val 
with open(TRAIN_DATA_DIR+'Xids_Claims.npy', 'wb') as f:
  np.save(f, Xids_Claims)
with open(TRAIN_DATA_DIR+'Xmask_Claims.npy', 'wb') as f:
  np.save(f, Xmask_Claims)
with open(TRAIN_DATA_DIR+'Xids_Mtext.npy', 'wb') as f:
  np.save(f, Xids_Mtext)
with open(TRAIN_DATA_DIR+'Xmask_Mtext.npy', 'wb') as f:
  np.save(f, Xmask_Mtext)
with open(TRAIN_DATA_DIR+'labels.npy', 'wb') as f:
  np.save(f, labels)

# test 
with open(TEST_DATA_DIR+'Xids_Claims_test.npy', 'wb') as f:
  np.save(f, Xids_Claims_test)
with open(TEST_DATA_DIR+'Xmask_Claims_test.npy', 'wb') as f:
  np.save(f, Xmask_Claims_test)
with open(TEST_DATA_DIR+'Xids_Mtext_test.npy', 'wb') as f:
  np.save(f, Xids_Mtext_test)
with open(TEST_DATA_DIR+'Xmask_Mtext_test.npy', 'wb') as f:
  np.save(f, Xmask_Mtext_test)
with open(TEST_DATA_DIR+'labels_test.npy', 'wb') as f:
  np.save(f, labels_test)


# delete every data from the memory
del df, Xids_Claims, Xmask_Claims, Xids_Mtext, Xmask_Mtext, labels
del Xids_Claims_test, Xmask_Claims_test, Xids_Mtext_test, Xmask_Mtext_test, labels_test


---------------

In [None]:
# LOAD TRAIN AND VAL DATA (we can just upload this data, and skip the previous data preprocessing steps)

with open(TRAIN_DATA_DIR+'Xids_Claims.npy', 'rb') as f:
  Xids_Claims = np.load(f)
with open(TRAIN_DATA_DIR+'Xmask_Claims.npy', 'rb') as f:
  Xmask_Claims = np.load(f)
with open(TRAIN_DATA_DIR+'Xids_Mtext.npy', 'rb') as f:
  Xids_Mtext = np.load(f)
with open(TRAIN_DATA_DIR+'Xmask_Mtext.npy', 'rb') as f:
  Xmask_Mtext = np.load(f)
with open(TRAIN_DATA_DIR+'labels.npy', 'rb') as f:
  labels = np.load(f)
  

In [None]:

print(Xids_Claims)
print(Xids_Claims.shape)


#### Put our array into a tensorflow dataset object

In [None]:
# CREATE THE TENSORFLOW DATASET OBJECT

dataset = tf.data.Dataset.from_tensor_slices((Xids_Claims, Xmask_Claims, 
                                              Xids_Mtext, Xmask_Mtext, labels))

for i in dataset.take(1): print(i)


In [None]:

# map the info with the data in a dic and the labels outside of the dic in the tuple
def map_func(input_ids_claims, masks_claims,input_ids_mtext, masks_mtext, labels):
  return {'input_ids_claims': input_ids_claims, 'attention_mask_claims': masks_claims, 
          'input_ids_mtext': input_ids_mtext, 'attention_mask_mtext': masks_mtext}, labels
          
dataset = dataset.map(map_func)
for i in dataset.take(1): print(i)


In [None]:
# FUNCTION TO SPLIT, SHUFFLE AND CREATE BATCH WITH THE TENSORFLOW DATASET


def get_dataset_partitions_tf(ds, ds_size, train_split=0.8, val_split=0.1, 
                              test_split=0.1, seed=10, shuffle=True, 
                              shuffle_size=100000, batch=True, batch_size=32):
    assert (train_split + test_split + val_split) == 1
    
    if shuffle:
        if batch:
            # Specify seed to always have the same split distribution between runs
            ds = ds.shuffle(shuffle_size, seed=seed).batch(batch_size)
            ds_size = len(list(ds)) # recalculate the lenght of the ds
        else:
            ds = ds.shuffle(shuffle_size, seed=seed)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds
    

In [None]:
# SPLIT DATASET

# lenght of the dataset
DS_LEN = len(list(dataset))

# the number inside the shuffle function only needs to be a very large number (larger if the dataset is larger)
train, val, _ = get_dataset_partitions_tf(dataset, DS_LEN, train_split=0.9, 
                                             val_split=0.1, test_split=0.0, seed=10, 
                                             shuffle=True, shuffle_size=100000, 
                                             batch=True, batch_size=32)


In [None]:

print(len(train))
print(len(val))


In [None]:

# Delete dataset
del dataset


## Build Model Arquitecture

In [None]:

# CREATE THE MODEL "bert-base-cased"

bert = TFAutoModel.from_pretrained(MODEL2_NAME)


# FUNCTIONS TO CREATE THE MAIN MODEL

def create_model(SEQ_LEN, input_ids_name, input_mask_name):
    input_ids = Input(shape=(SEQ_LEN,), name=input_ids_name, dtype='int32') # we have to put the same name that is in the dataset
    mask = Input(shape=(SEQ_LEN,), name=input_mask_name, dtype='int32')
   
    # this returns 2 tensors ( one is a 3d tensor with the last hidden state that contains all the output, 
    # and the other one is a 2d tensor that is the same but passed with a lineal activation function and 
    # pooled for calssifications for example)
    embeddings = bert(input_ids, attention_mask=mask)[1] # we are only getting the second one

#     output = GlobalMaxPool1D()(embeddings) # conver in 2d tensor with the maxpooling

    # return our model
    return input_ids, mask, embeddings



def create_combined_model(CLAIM_SEQ_LEN, M_TEXT_SEQ_LEN):
    claim_input_ids, claim_mask, claim_output = create_model(CLAIM_SEQ_LEN, 
                                                             'input_ids_claims', 
                                                             'attention_mask_claims')
    mtext_input_ids, mtext_mask, mtext_output = create_model(M_TEXT_SEQ_LEN, 
                                                             'input_ids_mtext', 
                                                             'attention_mask_mtext')

    # combine both outputs
    combinedInput = concatenate([claim_output, mtext_output])

    X = BatchNormalization()(combinedInput) # normalize the data
    X = Dense(512, activation='relu')(X)
    X = Dropout(0.5)(X)
    X = Dense(128, activation='relu')(X)
    X = Dropout(0.1)(X)
    X = Dense(32, activation='relu')(X)
    outputs = Dense(4, activation='softmax', name='outputs')(X)

    model = Model(inputs=[claim_input_ids, claim_mask, mtext_input_ids, mtext_mask], outputs=outputs)

    # freeze Bert because we are not going to retrain the pretrained model
    model.layers[4].trainable = False 
    
    return model
    

In [None]:
# CREATE MODEL

model = create_combined_model(CLAIM_SEQ_LEN, M_TEXT_SEQ_LEN)
model.summary()


In [None]:
# COMPILE MODEL

optimizer = Adam(learning_rate=1e-3, decay=1e-3 / 200)
loss = 'categorical_crossentropy'
acc = 'accuracy'

model.compile(optimizer=optimizer, loss=loss, metrics=[acc])


In [None]:
# CALLBACKS

import gc # Garbage Collector: to free mem
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# this callback free mem at the end of each epoch
class GarbageCollectorCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        gc.collect() # free mem with garbage collector

# Stop training when there is no improveas in loss validation for 30 consecutive epochs 
early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

# Only save the best model based on min val_loss
mcp_save = ModelCheckpoint(CHECKPOINT_PATH, save_best_only=True, monitor='val_loss', 
                           mode='min', save_weights_only=True, verbose=1)

# # Reduce learning rate the metric doesn't improve, by a factor of 0.001
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.001, patience=5, 
#                               min_delta=1e-4, mode='min')


In [None]:
# TRAIN THE MODEL

epochs = 150
H = model.fit(train, validation_data=val, epochs=epochs,
             callbacks=[GarbageCollectorCallback(), early_stopping, mcp_save])


In [None]:
# FUNCTION TO PLOT THE TRAINING HISTORY

def plot_training_history(H):
  plt.style.use("ggplot")
  plt.figure()

  X = np.arange(0, len(H.epoch))
  plt.plot(X, H.history["loss"], label="train_loss")
  plt.plot(X, H.history["val_loss"], label="val_loss")
  plt.plot(X, H.history["accuracy"], label="train_acc")
  plt.plot(X, H.history["val_accuracy"], label="val_acc")
  plt.title("Training Loss and Accuracy")
  plt.xlabel("Epoch #")
  plt.ylabel("Loss/Accuracy")
  plt.legend()
  plt.show()

plot_training_history(H)


In [None]:

# delete model
del model


-------------

## Preprocess Data, Load model and Make predictions

In [None]:
# LOAD TEST DATA

with open(TEST_DATA_DIR+'Xids_Claims_test.npy', 'rb') as f:
    Xids_Claims_test = np.load(f)
with open(TEST_DATA_DIR+'Xmask_Claims_test.npy', 'rb') as f:
    Xmask_Claims_test = np.load(f)
with open(TEST_DATA_DIR+'Xids_Mtext_test.npy', 'rb') as f:
    Xids_Mtext_test = np.load(f)
with open(TEST_DATA_DIR+'Xmask_Mtext_test.npy', 'rb') as f:
    Xmask_Mtext_test = np.load(f)
with open(TEST_DATA_DIR+'labels_test.npy', 'rb') as f:
    labels_test = np.load(f)
    

In [None]:
Xids_Claims_test

In [None]:
# CONVERT DATA TO TENSORFLOW DATASET

dataset_test = tf.data.Dataset.from_tensor_slices((Xids_Claims_test, Xmask_Claims_test, 
                                                  Xids_Mtext_test, Xmask_Mtext_test, labels_test))
for i in dataset_test.take(1): print(i)


In [None]:

# map the test data into a dic, and for evaluation put the test labels outside of the dic in the tuple

def map_func(input_ids_claims, masks_claims,input_ids_mtext, masks_mtext, labels):
  return {'input_ids_claims': input_ids_claims, 'attention_mask_claims': masks_claims, 
          'input_ids_mtext': input_ids_mtext, 'attention_mask_mtext': masks_mtext}

def map_func_eval(input_ids_claims, masks_claims,input_ids_mtext, masks_mtext, labels):
  return {'input_ids_claims': input_ids_claims, 'attention_mask_claims': masks_claims, 
          'input_ids_mtext': input_ids_mtext, 'attention_mask_mtext': masks_mtext}, labels

dataset_test_eval = dataset_test.map(map_func_eval)
dataset_test = dataset_test.map(map_func)

for i in dataset_test.take(1): print(i)


In [None]:
# CREATE 32 BATCHES OF EACH DATASET OBJECTS

batch_size=32
dataset_test = dataset_test.batch(batch_size)
dataset_test_eval = dataset_test_eval.batch(batch_size)


In [None]:
# LOAD MODEL AND WEIGHTS

CLAIM_SEQ_LEN = 45
M_TEXT_SEQ_LEN = 270

model = create_combined_model(CLAIM_SEQ_LEN, M_TEXT_SEQ_LEN)
model.load_weights(CHECKPOINT_PATH)

model.summary()


In [None]:
# # EVALUATE MODEL

l_names=["FALSE","MIXTURE","TRUE","UNPROVED"]

print("[INFO]: Evaluating the model...")

# model prediction
labels_pred = model.predict(dataset_test)
y_te, y_pred = labels_test.argmax(axis=1), labels_pred.argmax(axis=1)

# get the classification report
c_report = classification_report(y_te, y_pred, 
                                 target_names=l_names)

c_report_dic = classification_report(y_te, y_pred, 
                                     target_names=l_names, output_dict=True)

print("[INFO]: Done!")


### Show results

In [None]:
# PLOT CONFUSION MATRIX

# calculate the conf. matrix
cm = confusion_matrix(y_te, y_pred)


# plot the conf. matrix
plt.figure(figsize = (9,6))
ax = plt.axes()

sns.heatmap(cm, ax=ax, annot=True, xticklabels=l_names, 
            yticklabels=l_names, linewidths=.5, fmt='d')

ax.set_title('Confusion Matrix', size=20, pad=20)
ax.set_ylabel('Real', size=13)
ax.set_xlabel('Predictions', size=13)

plt.show()


In [None]:

print(c_report)


In [None]:
# PLOT HEATMAP OF THE CLASSIFICATION REPORT

plt.figure(figsize = (9,6))
ax = plt.axes()

report = pd.DataFrame(c_report_dic).iloc[:-1, :].T
report.rename(columns={"precision":"PRECISION", "recall":"RECALL", 
                       "f1-score":"F1-SCORE"}, inplace=True)

sns.heatmap(report, ax=ax, annot=True, linewidths=.5, cmap='YlGnBu', fmt='.2f')

ax.set_title('Classification Report', size=20, pad=20)
ax.xaxis.set_ticks_position('top')
plt.show()
