In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [3]:
# loading dataset
main_path = '/content/drive/My Drive/Fake news detection/Pakistani Media Dataset/'
dataset =  pd.read_csv(main_path+'pakistani_dataset_consolidated_features.xlsx')
X = dataset[['Article Length','Average Word Length','Count of Numbers',
             'Count of Exclaimation Marks','Count of Adjectives','Word Count',
             'Negative Sentiment','Positive Sentiment','Neutral Sentiment']]
Y = dataset['Label']

In [5]:
# splitting data into train and test 80/20
X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, random_state=123, test_size=0.2)

In [6]:
# creating model 
adaboost_classifier = AdaBoostClassifier(n_estimators=200)

# training the model
adaboost_classifier.fit(X_train, y_train)

# saving the model file
pickle.dump(adaboost_classifier, open('/content/drive/My Drive/Fake news detection/Traditional methods/AdaBoost/Custom Dataset/AdaBoost.sav', 'wb'))

In [7]:
# test LR model
pred = adaboost_classifier.predict(X_test)

In [8]:
# checking accuracy, precision, recall and F1 scores
print ("Accuracy: ")
print (accuracy_score(y_test, pred))

print ("Precision: ")
print (precision_score(y_test, pred, average="weighted"))

print ("Recall: ")
print (recall_score(y_test, pred, average="weighted"))

print ("F1 Score ")
print (f1_score(y_test, pred, average="weighted"))

print ("Confusion Matrix ")
print (confusion_matrix(y_test, pred))

Accuracy: 
0.9032527105921602
Precision: 
0.9026582416667224
Recall: 
0.9032527105921602
F1 Score 
0.902946847567445
Confusion Matrix 
[[ 268  119]
 [ 113 1898]]
