# This notebook tests the accuracy of our model again with validation data

In [48]:
#import libraries
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification,TextClassificationPipeline
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc 
import numpy as np


In [49]:
# Load the training dataset into a pandas dataframe.
df = pd.read_csv("/Users/pelumioluwaabiola/Downloads/Researchwork/CSVfiles/validation.csv")

# Report the number of sentences.
print('Number of validation sentences: {:,}\n'.format(df.shape[0]))

# Display 10 random rows from the data.
df.sample(10)

Number of validation sentences: 438



Unnamed: 0,standard_type,document_title,document_text,label
314,sustainable standards,validation_esg,institutions and other corporates. The IIF has...,0
60,sustainable standards,validation_esg,"is needed for mitigation, because large-scale ...",0
131,sustainable standards,validation_esg,requirements. SFDR introduces three new concep...,0
319,sustainable standards,validation_esg,"to figure out what you need to do, and where, ...",0
127,sustainable standards,validation_esg,"Like the EU Taxonomy, a social taxonomy would ...",0
11,sustainable standards,validation_sustainable,across the sector are involved in the identifi...,0
342,non-sustainable standards,validation_nonsustainable,require some form of assurance when using ﬁnan...,1
128,sustainable standards,validation_esg,"discussion,” wrote Sarah Thompson of Arthur Co...",0
75,sustainable standards,validation_esg,needs for SDG investments are vast and urgent....,0
86,sustainable standards,validation_esg,"on March 4, 2020, and the European Parliament ...",0


In [50]:
# Get the lists of sentences and their standard type.
sentences = df.document_text.values
labels = df.label.values

In [51]:
#load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Pelumioluwa/Sustainable-Finance-BERT")
model = AutoModelForSequenceClassification.from_pretrained("Pelumioluwa/Sustainable-Finance-BERT")
#pass both in a pipeline
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)



In [52]:
#make predictions on validation set 
preds = []
max_score_item_list = []
for sentence in sentences:
    pred = pipe(sentence)
    # Flatten the list of lists.
    flat_data = [item for sublist in pred for item in sublist]

    # Find the dictionary with the highest score.
    max_score_item = max(flat_data, key=lambda x:x['score'])
    max_score_item_list.append(max_score_item)

    if max_score_item['label'] == 'LABEL_0':
        preds.append(0)
    else:
        preds.append(1)

In [53]:
# Confusion Matrix 
cm = confusion_matrix(labels, preds) 
# Accuracy 
accuracy = accuracy_score(labels, preds) 
# Precision 
precision = precision_score(labels, preds) 
# Recall 
recall = recall_score(labels, preds) 
# F1-Score 
f1 = f1_score(labels, preds) 
# ROC Curve and AUC 
fpr, tpr, thresholds = roc_curve(labels, preds) 
roc_auc = auc(fpr, tpr) 
  
print("Confusion Matrix:") 
print(cm) 
print("Accuracy:", accuracy) 
print("Precision:", precision) 
print("Recall:", recall) 
print("F1-Score:", f1) 
print("ROC AUC:", roc_auc)

Confusion Matrix:
[[333   1]
 [ 51  53]]
Accuracy: 0.8812785388127854
Precision: 0.9814814814814815
Recall: 0.5096153846153846
F1-Score: 0.6708860759493671
ROC AUC: 0.7533106863196684


In [54]:
# Count the number of 1s and 0s in labels
num_nonsustain = np.count_nonzero(labels == 1)
num_sustain = np.count_nonzero(labels == 0)

print('non_sustain is : ', num_nonsustain)
print('sustain is : ', num_sustain)


non_sustain is :  104
sustain is :  334


In [55]:
#count numbers of 1s and 0s in preds
num_nonsustain_pred = preds.count(1)
num_sustain_pred = preds.count(0)

print('non_sustain is : ', num_nonsustain_pred)
print('sustain is : ', num_sustain_pred)

non_sustain is :  54
sustain is :  384
