In [1]:
import pandas as pd

In [2]:
true_data = pd.read_csv('True.csv')
false_data = pd.read_csv('Fake.csv')

In [3]:
true_data['class']=1
false_data['class']=0

In [4]:
true_data.shape

(21417, 5)

In [5]:
data_merge=pd.concat([true_data, false_data], axis=0)
data_merge.head(10)
data_merge.tail(20)

Unnamed: 0,title,text,subject,date,class
23461,REPORT: ‘Federal Government Escalated the Viol...,KILLED: Rancher and protest spokesman Robert ...,Middle-east,"January 28, 2016",0
23462,"BOILER ROOM – Oregon Standoff, Cuddle Parties,...",Tune in to the Alternate Current Radio Network...,Middle-east,"January 28, 2016",0
23463,"Eyewitness Says Feds Ambushed Bundys, 100 Shot...",Patrick Henningsen 21st Century Wire UPDATE: 1...,Middle-east,"January 27, 2016",0
23464,Episode #119 – SUNDAY WIRE: ‘You Know the Dril...,Episode #119 of SUNDAY WIRE SHOW finally resum...,Middle-east,"January 24, 2016",0
23465,‘There’ll be boots on the ground’: US making n...,21st Century Wire says Various parties in Wash...,Middle-east,"January 23, 2016",0
23466,Boston Brakes? How to Hack a New Car With Your...,21st Century Wire says For those who still ref...,Middle-east,"January 22, 2016",0
23467,Oregon Governor Says Feds ‘Must Act’ Against P...,"21st Century Wire says So far, after nearly 20...",Middle-east,"January 21, 2016",0
23468,Ron Paul on Burns Oregon Standoff and Jury Nul...,21st Century Wire says If you ve been followin...,Middle-east,"January 21, 2016",0
23469,BOILER ROOM: As the Frogs Slowly Boil – EP #40,Tune in to the Alternate Current Radio Network...,Middle-east,"January 20, 2016",0
23470,Arizona Rancher Protesting in Oregon is Target...,RTOne of the most visible members of the armed...,Middle-east,"January 20, 2016",0


In [6]:
data_merge.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [7]:
data = data_merge.drop(['title','subject','date'],axis=1)

In [8]:
data.columns

Index(['text', 'class'], dtype='object')

In [9]:
data.isnull().sum()

text     0
class    0
dtype: int64

In [10]:
data.reset_index(inplace = True)
data.drop(['index'], axis = 1, inplace = True)

data.columns

Index(['text', 'class'], dtype='object')

In [11]:
x = data['text']
y = data['class']

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)

In [14]:
tfvect = TfidfVectorizer(stop_words='english',max_df=0.7)
tfid_x_train = tfvect.fit_transform(x_train)
tfid_x_test = tfvect.transform(x_test)

In [15]:
classifier = PassiveAggressiveClassifier(max_iter=50)
classifier.fit(tfid_x_train,y_train)

In [16]:
y_pred = classifier.predict(tfid_x_test)

In [17]:
print("Confusion Matrix: \n",confusion_matrix(y_test, y_pred))
print("Classification Report: \n",classification_report(y_test, y_pred))
print("Accuracy: \n",accuracy_score(y_test, y_pred))


Confusion Matrix: 
 [[4647   23]
 [  21 4289]]
Classification Report: 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      4670
           1       0.99      1.00      0.99      4310

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980

Accuracy: 
 0.9951002227171493


In [18]:
def fake_news_det(news):
    input_data = [news]
    vectorized_input_data = tfvect.transform(input_data)
    prediction = classifier.predict(vectorized_input_data)

    if(prediction == 0):
        print("FALSE")
    else:
        print("TRUE")
    # print(prediction)

In [19]:
fake_news_det('U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.')

TRUE


In [20]:
fake_news_det("""Go to Article 
President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.  """)

FALSE


In [24]:
import pickle
pickle.dump(classifier,open('model.pkl', 'wb'))
# load the model from disk
loaded_model = pickle.load(open('model.pkl', 'rb'))

In [22]:
def testing_fun(news):
    input_data = [news]
    vectorized_input_data = tfvect.transform(input_data)
    prediction = loaded_model.predict(vectorized_input_data)

    if(prediction == 0):
        print("FALSE")
    else:
        print("TRUE")
    # print(prediction)

In [23]:
testing_fun("""Go to Article 
President Barack Obama has been campaigning hard for the woman who is supposedly going to extend his legacy four more years. The only problem with stumping for Hillary Clinton, however, is sheâ€™s not exactly a candidate easy to get too enthused about.  """)

FALSE
