In [1]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix


In [2]:
# loading dataset
main_path = '/content/drive/My Drive/Fake news detection/Pakistani Media Dataset/'
dataset =  pd.read_csv(main_path+'pakistani_dataset_consolidated_features.xlsx')
X = dataset[['Article Length','Average Word Length','Count of Numbers',
             'Count of Exclaimation Marks','Count of Adjectives','Word Count',
             'Negative Sentiment','Positive Sentiment','Neutral Sentiment']]
Y = dataset['Label']

In [3]:
# splitting data into train and test 80/20
X_train, X_test, y_train, y_test = train_test_split(X, Y, stratify=Y, random_state=123, test_size=0.2)

In [5]:
# creating model 
KNN_classifier = KNeighborsClassifier(n_neighbors=5)

# training the model
KNN_classifier.fit(X_train, y_train)

# saving the model file
pickle.dump(KNN_classifier, open('/content/drive/My Drive/Fake news detection/Traditional methods/KNN/Custom Dataset/KNN.sav', 'wb'))

In [6]:
# test KNN model
pred = KNN_classifier.predict(X_test)

In [7]:
# checking accuracy, precision, recall and F1 scores
print ("Accuracy: ")
print (accuracy_score(y_test, pred))

print ("Precision: ")
print (precision_score(y_test, pred, average="weighted"))

print ("Recall: ")
print (recall_score(y_test, pred, average="weighted"))

print ("F1 Score ")
print (f1_score(y_test, pred, average="weighted"))

print ("Confusion Matrix ")
print (confusion_matrix(y_test, pred))

Accuracy: 
0.8957464553794829
Precision: 
0.8970915314618311
Recall: 
0.8957464553794829
F1 Score 
0.8963862936232944
Confusion Matrix 
[[ 268  119]
 [ 131 1880]]
