<a href="https://colab.research.google.com/github/GeorgeSherif/NLP-ChatEGP/blob/main/Testing_Without_Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Testing with the Arabic Dataset.


1.   Load the model.
2.   Load the dataset.
3.   Translate the dataset and preprocess it.
4.   Test using the model.



In [4]:
!pip install xformers transformers sentencepiece torch pygal torchvision sacremoses


Successfully installed huggingface-hub-0.14.1 mypy-extensions-1.0.0 pygal-3.0.0 pyre-extensions-0.0.29 sacremoses-0.0.53 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.29.2 typing-inspect-0.8.0 xformers-0.0.19


In [5]:
import pandas as pd
import numpy as np
import os
import warnings
import csv
import re
import seaborn as sns
import pygal as py
import torch
import torchvision

from sklearn.feature_extraction.text import CountVectorizer

warnings.filterwarnings("ignore", category=FutureWarning)


In [None]:
from google.colab import drive
from IPython.display import display # Allows the use of display() for DataFrames
drive.mount('/content/gdrive')

In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [None]:
import pandas as pd
import numpy as np
dfTest = pd.read_csv('/content/gdrive/MyDrive/NLP/labelled.csv' ,engine="python")

In [8]:
from transformers import MarianTokenizer, MarianMTModel
mname = "Helsinki-NLP/opus-mt-tc-big-ar-en"
tokenizer = MarianTokenizer.from_pretrained(mname)
model_translate = MarianMTModel.from_pretrained(mname)

Downloading (…)neration_config.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

In [9]:
translated = []
for i in range(len(dfTest)):
  input = dfTest['Sentence'][i]
  translated_tokens = model_translate.generate(**tokenizer.prepare_seq2seq_batch([input], return_tensors="pt"))
  translated_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated_tokens]
  translated.append(translated_text[0])
dfTest['English'] = translated



In [11]:
dfTest = dfTest.drop(["Sentence","id"] , axis =1)
dfTest = dfTest.rename(columns={'label': 'Sentiment', 'English':'Sentence'})
dfTest['Sentiment'] = dfTest['Sentiment'].replace(['negative','neutral','positive'],[0,1,2])
dfTest

Unnamed: 0,Sentiment,Sentence
0,0,The profits of the Gulf Livestock Company fell...
1,1,Aramex to discuss cash dividend of 16% for 2016
2,0,Al Ain Al Ahlia Insurance achieved a profit of...
3,2,Umm Al Quwain Cement’s profit increased by 30%...
4,0,The General Assembly of Fujairah Cement Indust...
...,...,...
63,1,The Dow Jones quoted an official of Ajman Isla...
64,1,Depa said in a statement on Nasdaq Dubai that ...
65,2,Dubai Islamic Insurance and Reinsurance Compan...
66,2,Gulf Investment Company’s profit in 2016 was A...


In [None]:
from transformers import BertTokenizer,BertForSequenceClassification, TextClassificationPipeline

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels = 3,
                                                      id2label={0: 'negative', 1: 'neutral', 2: 'positive'},
                                                      output_attentions = False,
                                                      output_hidden_states = False).to(device)
                                                      
accuracy_per_epoch = []

In [64]:
path = F"/content/gdrive/MyDrive/NLP/BERT_ft_epoch8.model"
model.load_state_dict(torch.load(path))
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)

In [65]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

#encode Arabic Testing set
encoded_data_val_Arabic = tokenizer.batch_encode_plus( dfTest.Sentence.values,
                                                       add_special_tokens = True,
                                                       return_attention_mask = True,
                                                       return_tensors = 'pt', 
                                                       truncation=True,
                                                       padding=True,
                                                      max_length = 200)

input_ids_val_Arabic = encoded_data_val_Arabic['input_ids']
attention_masks_val_Arabic = encoded_data_val_Arabic['attention_mask']
labels_val_Arabic = torch.tensor(dfTest.Sentiment.values)


#Arabic Validation set
dataset_val_Arabic = TensorDataset(input_ids_val_Arabic, 
                                   attention_masks_val_Arabic, 
                                   labels_val_Arabic)


batch_size = 8



#Arabic validation set
dataloader_val_Arabic = DataLoader(dataset_val_Arabic,
                                   sampler = RandomSampler(dataset_val_Arabic),
                                   batch_size = 8) #since we don't have to do backpropagation for this step



In [66]:
#accuracy score
def accuracy_per_class(preds, labels):
    #label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    #make prediction
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    true = 0
    total = 0
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        #print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy:{len(y_preds[y_preds==label])}/{len(y_true)}\n -> {len(y_preds[y_preds==label]) / len(y_true)}')
        true = true + len(y_preds[y_preds==label])
        total = total + len(y_true)
    return true / total

def evaluate(dataloader_val):

    #evaluation mode 
    model.eval()
    #tracking variables
    loss_val_total = 0
    predictions, true_vals = [], []

    for batch in tqdm(dataloader_val):

        #load into GPU
        batch = tuple(b.to(device) for b in batch)

        #define inputs
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2]}

        #compute logits
        with torch.no_grad():        
            outputs = model(**inputs)
        
        #compute loss
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        #compute accuracy
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    #compute average loss
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals



In [67]:
from tqdm import tqdm

_,predictions, true_vals = evaluate(dataloader_val_Arabic)

100%|██████████| 9/9 [00:35<00:00,  3.91s/it]


In [68]:
true_vals.shape
accuracy = accuracy_per_class(predictions, true_vals)
accuracy_per_epoch.append(accuracy)

Accuracy:11/13
 -> 0.8461538461538461
Accuracy:24/27
 -> 0.8888888888888888
Accuracy:19/28
 -> 0.6785714285714286


In [69]:
accuracy_per_epoch

[0.7205882352941176,
 0.8382352941176471,
 0.8088235294117647,
 0.8088235294117647,
 0.8088235294117647,
 0.8088235294117647,
 0.7941176470588235,
 0.7941176470588235]

In [41]:
# preprocess the sentence
correct = 0
false = 0
positive = 0
negative = 0
neutral = 0
for i in range(len(dfTest)):
  row = dfTest.iloc[i]
  sentence = row['Sentence']
  label = row['Sentiment']
  input_ids = torch.tensor(tokenizer.encode(sentence, add_special_tokens=True)).unsqueeze(0)
  attention_mask = torch.tensor([1] * input_ids.shape[1]).unsqueeze(0)

  # get the predicted sentiment label
  with torch.no_grad():
      outputs = model(input_ids=input_ids, attention_mask=attention_mask)
      _, predicted_label = torch.max(outputs[0], dim=1)
  print(sentence, predicted_label,label)
  if(label == 0):
    negative = negative + 1
  elif(label == 1):
    neutral = neutral + 1
  elif(label == 2):
    positive = positive + 1
  if(predicted_label == label):
    correct = correct + 1
  else:
    false = false + 1
  print(correct,false)

The profits of the Gulf Livestock Company fell to 12.1 million dirhams in 2010, compared to 27.6 million dirhams in 2009. tensor([1]) 0
0 1
Error
Aramex to discuss cash dividend of 16% for 2016 tensor([1]) 1
1 1
Error
Al Ain Al Ahlia Insurance achieved a profit of AED 2.50 million in 2012 compared to AED 2.53 million in 2011. tensor([1]) 0
1 2
Error
Umm Al Quwain Cement’s profit increased by 30% to AED 7.22 million in the first quarter of this year tensor([1]) 2
1 3
Error
The General Assembly of Fujairah Cement Industries decided not to distribute cash dividends or shares for the year 2010. tensor([1]) 0
1 4
Error
Next Tuesday, ADCB’s Board of Directors will meet to discuss some general matters. tensor([1]) 1
2 4
Error
Dubai Development Company’s profit last year was AED 735,000 compared to AED 444,000 in 2013 tensor([1]) 2
2 5
Error
Al Ain Al Ahlia Insurance Company announced that it will disclose its financial results for the third quarter of 2011 on 31/10/2011 tensor([1]) 1
3 5
Erro

In [43]:
print("Accuracy of testing = ", 100 * correct / (correct + false)) 
print(negative,positive,neutral) 

Accuracy of testing =  36.76470588235294
13 28 27
