In [1]:
# Importing libraries
import pandas as pd
import pickle
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import re
port_stem=PorterStemmer()

In [2]:
# Load dataset and feature engineering
df = pd.read_csv("dataset.csv")
df = df.fillna('')
df = df.drop(['id', 'title', 'author'], axis=1)

In [18]:
df

Unnamed: 0,text,label
0,hous dem aid even see comey letter jason chaff...,1
1,ever get feel life circl roundabout rather hea...,0
2,truth might get fire octob tension intellig an...,1
3,video civilian kill singl us airstrik identifi...,1
4,print iranian woman sentenc six year prison ir...,1
...,...,...
20795,rapper unload black celebr met donald trump el...,0
20796,green bay packer lost washington redskin week ...,0
20797,maci today grew union sever great name america...,0
20798,nato russia hold parallel exercis balkan press...,1


In [14]:
# Preprocessing functions
def stemming(content):
    con=re.sub('[^a-zA-Z]', ' ', content)
    con=con.lower()
    con=con.split()
    con=[port_stem.stem(word) for word in con if not word in stopwords.words('english')]
    con=' '.join(con)
    return con
nltk.download('stopwords')
df['text']=df['text'].apply(stemming)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\anubh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [15]:
# Train-test split
x = df['text']
y = df['label']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)

In [16]:
# Vectorization
vect = TfidfVectorizer()
x_train_vect = vect.fit_transform(x_train)
x_test_vect = vect.transform(x_test)

In [17]:
# Train models
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'Multinomial Naive Bayes': MultinomialNB()
}

for name, model in models.items():
    model.fit(x_train_vect, y_train)
    y_pred = model.predict(x_test_vect)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy}")

Decision Tree Accuracy: 0.8829326923076923
Random Forest Accuracy: 0.9100961538461538
SVM Accuracy: 0.9514423076923076
Multinomial Naive Bayes Accuracy: 0.8826923076923077


In [19]:
# Pickle models
for name, model in models.items():
    pickle.dump(model, open(f'{name.lower().replace(" ", "_")}_model.pkl', 'wb'))

Conclusion: SVM has the highest accuracy 

In [20]:
# Train SVM model
svm_model = SVC()
svm_model.fit(x_train_vect, y_train)

In [22]:
pickle.dump(vect, open('vector.pkl', 'wb'))
pickle.dump(model, open('model.pkl', 'wb'))
vector_form=pickle.load(open('vector.pkl', 'rb'))
load_model=pickle.load(open('model.pkl', 'rb'))

In [23]:
def fake_news(news):
    news=stemming(news)
    input_data=[news]
    vector_form1=vector_form.transform(input_data)
    prediction = load_model.predict(vector_form1)
    return prediction

In [24]:
val=fake_news("""In these trying times, Jackie Mason is the Voice of Reason. [In this week’s exclusive clip for Breitbart News, Jackie discusses the looming threat of North Korea, and explains how President Donald Trump could win the support of the Hollywood left if the U. S. needs to strike first.  “If he decides to bomb them, the whole country will be behind him, because everybody will realize he had no choice and that was the only thing to do,” Jackie says. “Except the Hollywood left. They’ll get nauseous. ” “[Trump] could win the left over, they’ll fall in love with him in a minute. If he bombed them for a better reason,” Jackie explains. “Like if they have no transgender toilets. ” Jackie also says it’s no surprise that Hollywood celebrities didn’t support Trump’s strike on a Syrian airfield this month. “They were infuriated,” he says. “Because it might only save lives. That doesn’t mean anything to them. If it only saved the environment, or climate change! They’d be the happiest people in the world. ” Still, Jackie says he’s got nothing against Hollywood celebs. They’ve got a tough life in this country. Watch Jackie’s latest clip above.   Follow Daniel Nussbaum on Twitter: @dznussbaum """)

In [25]:
if val==[0]:
    print('reliable')
else:
    print('unreliable')

reliable
