# Preporation

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score,\
 accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.svm import SVC
import transformers

In [2]:
train_data_path = "/content/drive/MyDrive/FE_models/Data/train_data_w_emmbd.xlsx"
test_data_path = "/content/drive/MyDrive/FE_models/Data/test_data_w_emmbd.xlsx"

train_data = pd.read_excel(train_data_path)
test_data = pd.read_excel(test_data_path)

target_list = ['Fake', 'Truthful']


In [3]:
# train_data_path = "/content/drive/MyDrive/FE_models/Data/Train_data.xlsx"
# test_data_path = "/content/drive/MyDrive/FE_models/Data/Test_data.xlsx"

# train_data = pd.read_excel(train_data_path)
# test_data = pd.read_excel(test_data_path)

# target_list = ['Fake', 'Truthful']

In [4]:
MAX_LEN = 256

In [5]:
# downloading the model & the tokenizer
b_tokenizer = transformers.AutoTokenizer.from_pretrained("/content/drive/MyDrive/model-bert-base-arabertv2/Models/tokenizers-bert-base-arabertv2")
b_model = transformers.AutoModel.from_pretrained("/content/drive/MyDrive/FE_models/Fine-tuned_BERT/FineTunedBERT", return_dict=True)

# Data

In [6]:
def emmbding(text):
  input = b_tokenizer.encode_plus(text,
                                  None,
                                  add_special_tokens=True,
                                  max_length=MAX_LEN,
                                  padding="max_length",
                                  truncation=True,
                                  return_attention_mask=True,
                                  return_tensors="pt"
                                  )
  output = b_model(input_ids=input["input_ids"],
                   attention_mask=input["attention_mask"],
                   token_type_ids=input["token_type_ids"]
                   ).pooler_output.detach().numpy()
  return output

In [7]:
# # emmbde the Text in train set
# embd_texts = (emmbding( train_data['Text'][0]))

# for text in train_data['Text'][1:]:
#   embd_texts = np.vstack([embd_texts ,emmbding(text)])

# embd_texts = pd.DataFrame(embd_texts)


# # emmbde the Text in test set
# embd_texts = (emmbding( test_data['Text'][0]))

# for text in test_data['Text'][1:]:
#   embd_texts = np.vstack([embd_texts ,emmbding(text)])

# embd_texts = pd.DataFrame(embd_texts)

In [8]:
# # Adding the Text emmbding as new features
# train_data.drop(labels=['Text','Truthful'], axis=1, inplace=True)
# test_data.drop(labels=['Text','Truthful'], axis=1, inplace=True)

# train_data = pd.concat((train_data, embd_texts), axis=1)
# test_data = pd.concat((test_data, embd_texts), axis=1)


In [9]:
# Features range
col = list(range(0,768))

# The Model

In [10]:
svc = SVC()

# Trainning

In [11]:
svc.fit(train_data[col], train_data["Fake"])

# Testing  

In [12]:
y_test = test_data['Fake']
pred_y_test = svc.predict(test_data[col])

In [13]:
metrics = {}

metrics["accuracy_score"] = accuracy_score(y_test, pred_y_test)
metrics["precision_score"] =  precision_score(y_test, pred_y_test)
metrics["recall_score"] = recall_score(y_test, pred_y_test)
metrics["f1_score"] =  f1_score(y_test, pred_y_test)

print(f"metrics: {metrics}")
print(classification_report(y_test, pred_y_test, target_names=['Truthful', 'Fake']))


metrics: {'accuracy_score': 0.9393063583815029, 'precision_score': 0.9411764705882353, 'recall_score': 0.946236559139785, 'f1_score': 0.9436997319034853}
              precision    recall  f1-score   support

    Truthful       0.94      0.93      0.93       160
        Fake       0.94      0.95      0.94       186

    accuracy                           0.94       346
   macro avg       0.94      0.94      0.94       346
weighted avg       0.94      0.94      0.94       346



# Interface For testing

In [14]:
def interface(model):
  text = input("Enter an Arabic text:                              \n ")
  embd_text = emmbding(text)
  pred_value = model.predict(embd_text).item()

  if pred_value == 1:
    print('The review is Fake')
  elif pred_value == 0:
    print('The review is Truthful')

In [15]:
interface(svc)

Enter an Arabic text:                              
 اكره كل هذه الفنادق الكريهة والقذرة اتمنا ان يموت جميع من في العالم لهذا السبب 
The review is Fake


In [16]:
interface(svc)

Enter an Arabic text:                              
 كان الفندق ذو جودة متوسطة كما هو متوقع ولكن الموظفون كانوا لطيفين جدا 
The review is Truthful


In [17]:
# اكره كل هذه الفنادق الكريهة والقذرة اتمنا ان يموت جميع من في العالم لهذا السبب => FAKE
# كان الفندق ذو جودة متوسطة كما هو متوقع ولكن الموظفون كانوا لطيفين جدا => Truthful

# Exit

In [18]:
exit()