In [1]:
# !pip install --upgrade scikit-learn
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import svm
import pickle
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [2]:
# loading dataset
main_path = '/content/drive/My Drive/Fake news detection/Pakistani Media Dataset/'
dataset =  pd.read_csv(main_path+'pakistani_dataset_consolidated_features.xlsx')
X = dataset[['Article Length','Average Word Length','Count of Numbers',
             'Count of Exclaimation Marks','Count of Adjectives','Word Count',
             'Negative Sentiment','Positive Sentiment','Neutral Sentiment']]
Y = dataset['Label']

In [3]:
# scaling the input
X = preprocessing.scale(X)

In [4]:
# splitting data into train and test 80/20
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [5]:
# creating model 
svm_classifier = svm.LinearSVC(verbose=True)

# training the model
svm_classifier.fit(X_train, y_train)

# saving the model file
pickle.dump(svm_classifier, open('/content/drive/My Drive/Fake news detection/Traditional methods/SVM_Lexical_and_Sentiment/Custom Dataset/svm_lexical_sentiment.sav', 'wb'))

[LibLinear]



In [6]:
# test SVM model
pred = svm_classifier.predict(X_test)

In [7]:
# checking accuracy, precision, recall and F1 scores
print ("Accuracy: ")
print (accuracy_score(y_test, pred))

print ("Precision: ")
print (precision_score(y_test, pred, average="weighted"))

print ("Recall: ")
print (recall_score(y_test, pred, average="weighted"))

print ("F1 Score ")
print (f1_score(y_test, pred, average="weighted"))

print ("Confusion Matrix ")
print (confusion_matrix(y_test, pred))

Accuracy: 
0.8465387823185988
Precision: 
0.8227473616855734
Recall: 
0.8465387823185988
F1 Score 
0.7915571809938609
Confusion Matrix 
[[  30  354]
 [  14 2000]]
