# Importing libraries

In [44]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score

# Importing file

In [45]:
df = pd.read_csv('news.csv')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
print('Total size of the dataset', df.shape, '\n')

print(df.columns, '\n\n')
print(df.head(10))

Total size of the dataset (6335, 4) 

Index(['Unnamed: 0', 'title', 'text', 'label'], dtype='object') 


   Unnamed: 0                                              title  \
0        8476                       You Can Smell Hillary’s Fear   
1       10294  Watch The Exact Moment Paul Ryan Committed Pol...   
2        3608        Kerry to go to Paris in gesture of sympathy   
3       10142  Bernie supporters on Twitter erupt in anger ag...   
4         875   The Battle of New York: Why This Primary Matters   
5        6903                                        Tehran, USA   
6        7341  Girl Horrified At What She Watches Boyfriend D...   
7          95                  ‘Britain’s Schindler’ Dies at 106   
8        4869  Fact check: Trump and Clinton at the 'commande...   
9        2909  Iran reportedly makes new push for uranium con...   

                                                text label  
0  Daniel Greenfield, a Shillman Journalism Fello...  FAKE  
1  Google Pinterest Digg

# Creating traing and testing dataset

In [46]:
x_train, x_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=7)

# Scaling the Data

In [47]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

print('Size of training dataset:', tfidf_train.shape)
print('Size of testing dataset:', tfidf_test.shape)

Size of training dataset: (5068, 61651)
Size of testing dataset: (1267, 61651)


# Building the model

In [48]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train, y_train)

# Making Predictions

In [49]:
y_pred = pac.predict(tfidf_test)
score = accuracy_score(y_test, y_pred)

print('Accuracy', round(100*(accuracy_score(y_test, y_pred)),2), '%')
# print(f'Accuracy: {round(score * 100, 2)}%')

print('Precision is given by', round(100*(precision_score(y_test, y_pred, pos_label='REAL')),2))
print('Recall is given by', round(100*(recall_score(y_test, y_pred, pos_label='REAL')),2))
print('F1 score is given by', round(100*(f1_score(y_test, y_pred, pos_label='REAL')), 2))

Accuracy 92.58 %
Precision is given by 92.13
Recall is given by 93.0
F1 score is given by 92.56


# Evaluting Performance

In [50]:
confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL'])

print('Confusion Matrix is given by:\n', confusion_matrix(y_test, y_pred, labels=['FAKE', 'REAL']))

Confusion Matrix is given by:
 [[588  50]
 [ 44 585]]
