### import libraries


In [1]:
import numpy as np
import pandas as pd

### read the data

In [2]:
df = pd.read_csv('news.csv')
df.shape
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [3]:
df

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


### Labels of DataFrame

In [4]:
labels = df.label
labels

0       FAKE
1       FAKE
2       REAL
3       FAKE
4       REAL
        ... 
6330    REAL
6331    FAKE
6332    FAKE
6333    REAL
6334    REAL
Name: label, Length: 6335, dtype: object

### import Scikit libraries

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.feature_extraction.text import TfidfVectorizer

### Split the data sets

In [6]:
x_train,x_test,y_train,y_test = train_test_split(df['text'],labels,test_size=0.2,random_state=7)

In [7]:
x_train

6237    The head of a leading survivalist group has ma...
3722    ‹ › Arnaldo Rodgers is a trained and educated ...
5774    Patty Sanchez, 51, used to eat 13,000 calories...
336     But Benjamin Netanyahu’s reelection was regard...
3622    John Kasich was killing it with these Iowa vot...
                              ...                        
5699                                                     
2550    It’s not that Americans won’t elect wealthy pr...
537     Anyone writing sentences like ‘nevertheless fu...
1220    More Catholics are in Congress than ever befor...
4271    It was hosted by CNN, and the presentation was...
Name: text, Length: 5068, dtype: object

### Intilalize Tfidf Vectorizer

In [8]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english',max_df = 0.7)
tfidf_train = tfidf_vectorizer.fit_transform(x_train)
tfidf_test = tfidf_vectorizer.transform(x_test)

### Passive Aggressive Classifier (PAC)

In [9]:
pac = PassiveAggressiveClassifier(max_iter=50)
pac.fit(tfidf_train,y_train)
y_pred = pac.predict(tfidf_test)

### Accuracy score

In [10]:
score = accuracy_score(y_pred,y_test)
print(f'Accuracy= {round(score*100,2)}%')

Accuracy= 93.13%


### Confusion Matrix

In [11]:
cm=confusion_matrix(y_test,y_pred,labels=['FAKE','REAL'])
print(cm)

[[593  45]
 [ 42 587]]


### Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
Logreg = LogisticRegression()
Logreg.fit(tfidf_train,y_train)
y_pred = Logreg.predict(tfidf_test)
log_acc = accuracy_score(y_pred,y_test)
print(f'Accuracy={round(log_acc*100,2)}%')

Accuracy=91.71%


### Support Vector machine

In [13]:
from sklearn.svm import SVC
Svm = SVC()
Svm.fit(tfidf_train,y_train)
y_pred = Svm.predict(tfidf_test)
svc_score = accuracy_score(y_pred,y_test)
print(f'Accuracy= {round(svc_score*100,2)}%')

Accuracy= 92.9%


### Decision Tree

In [14]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
tree.fit(tfidf_train,y_train)
tree_pred = tree.predict(tfidf_test)
tree_score = accuracy_score(tree_pred,y_test)
print(f'Accuracy = {round(tree_score*100,2)}%')

Accuracy = 80.35%


### Random Forest Classifier 

In [15]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier()
rf_clf.fit(tfidf_train,y_train)
rf_pred = rf_clf.predict(tfidf_test)
rf_score = accuracy_score(rf_pred,y_test)
print(f'Accuracy = {round(rf_score*100,2)}%')

Accuracy = 90.61%


### KNN


In [24]:
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()
knn_clf.fit(tfidf_train,y_train)
knn_pred = knn_clf.predict(tfidf_test)
knn_score = accuracy_score(knn_pred,y_test)
print(f'Accuracy = {round(knn_score*100,2)}%')

Accuracy = 56.12%


### Average accuracy 

In [31]:
avg = (score+log_acc+svc_score+tree_score+rf_score+knn_score)/6
print(f'Average = {round(avg*100,2)}%')

Average = 84.14%
