In [1]:
import re
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import pandas as pd
# plotting
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# nltk
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
# sklearn
# from sklearn.svm import LinearSVC
# from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score

In [2]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [4]:
newsdata = pd.read_csv('Fake_news.csv',encoding= "ISO-8859-1")
newsdata.head()

Unnamed: 0,id,title,text,label
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,1
1,1,,Did they post their votes for Hillary already?,1
2,2,UNBELIEVABLE! OBAMAâS ATTORNEY GENERAL SAYS ...,"Now, most of the demonstrators gathered last ...",1
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,0
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",1


In [5]:
print(len(newsdata))

3867


In [6]:
newsdata['id'] = range(1, len(newsdata) + 1)   # correcting the order of id 

In [7]:
newsdata.iloc[189:350]

Unnamed: 0,id,title,text,label
189,190,Places to Put Your Hands Together - The New Yo...,"Whether the music to your ears is pop, classic...",0
190,191,United resumes Newark-Delhi flights after halt...,NEW YORK/SINGAPORE (Reuters) - United Airlines...,0
191,192,Gallery Hopes to Sell Kanye Westâs âFamous...,LOS ANGELES â Four million dollars. Thatâ...,0
192,193,RUDY GIULIANI SLAMS HILLARY After She Refuses ...,What in the world has become of the Democrat P...,1
193,194,Mistrial for 2 Ex-Officers in Albuquerque Kill...,ALBUQUERQUE â After two full days of deli...,0
...,...,...,...,...
345,346,"Ecuador Votes as Latin Americaâs Left Sags, ...","QUITO, Ecuador â For most of Luis GarcÃ­a...",0
346,347,Gavel Battle: Boehner resignation sparks House...,House Speaker John Boehnerâs stunning announ...,0
347,348,OBAMA RACES TO SET GITMO TERRORISTS FREEâ¦Lea...,It s hard to imagine a President who could be ...,1
348,349,NEW POLL Asks Trump Voters If Theyâd Vote Fo...,"One year later, and Donald Trump s supporters ...",1


In [15]:
port_stem= PorterStemmer()
def stemming(content):
    if isinstance(content, (float, int)):  # Check if content is a float or integer
        content = str(content)  # Convert float or integer to string
    elif content is None:  # Check if content is None
        content = ''  # Convert None to empty string
    else:
        content = str(content)
    stemmed_content = re.sub('[^a-zA-Z]'," ",content)
    stemmed_content = stemmed_content.lower()
    stemmed_content=stemmed_content.split()
    stemmed_content = [port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    
    return stemmed_content

In [16]:
newsdata['stemmed_content'] = newsdata['text'].apply(stemming)

In [17]:
print(newsdata['stemmed_content'])

0       comment expect barack obama member fyf fukyofl...
1                               post vote hillari alreadi
2       demonstr gather last night exercis constitut p...
3       dozen polit activ pastor came privat dinner fr...
4       rs sarmat missil dub satan replac ss fli mile ...
                              ...                        
3862    jona e alexi novemb putin result eros tradit n...
3863    austin texa reuter former baylor univers presi...
3864    atlanta exactli work art mythmak civil war his...
3865    ladi liberti depict black women coin first tim...
3866    solv problem like donald trump hillari clinton...
Name: stemmed_content, Length: 3867, dtype: object


In [18]:
X= newsdata['stemmed_content'].values
Y=newsdata['label'].values
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)
print(X_train.size,X_test.size,Y_train.size,Y_test.size)

3093 774 3093 774


In [19]:
X_train_texts = [str(x) for x in X_train]
vectorizer = TfidfVectorizer()
X_train_transformed = vectorizer.fit_transform(X_train_texts)
X_test_transformed= vectorizer.transform(X_test)

In [20]:
model = RandomForestClassifier(n_estimators=30)
model.fit(X_train_transformed,Y_train)
X_train_predicted = model.predict(X_train_transformed)
accuracy = accuracy_score(Y_train,X_train_predicted)
print(accuracy)
X_test_predicted = model.predict(X_test_transformed)
accuracy = accuracy_score(Y_test,X_test_predicted)
print(accuracy)

0.9996766892984158
0.872093023255814


In [26]:
new_input_text = "WASHINGTON (Reuters) - Charles Schumer, the top Democrat in the U.S. Senate, called on President Donald Trump on Sunday to name a single official to oversee and coordinate relief efforts in hurricane-ravaged Puerto Rico. Schumer, along with Representatives Nydia VelÃ zquez and Jose Serrano, said a â€œCEO of response and recoveryâ€ is needed to manage the complex and ongoing federal response in the territory, where millions of Americans remain without power and supplies. In a statement, Schumer said the current federal response to Hurricane Mariaâ€™s impact on the island had been â€œdisorganized, slow-footed and mismanaged.â€ â€œThis person will have the ability to bring all the federal agencies together, cut red tape on the public and private side, help turn the lights back on, get clean water flowing and help bring about recovery for millions of Americans who have gone too long in some of the worst conditions,â€ he said. The White House did not immediately respond to a request for comment. The Democrats contended that naming a lone individual to manage the governmentâ€™s relief efforts was critical, particularly given that the Federal Emergency Management Agency is already stretched thin from dealing with other crises, such as the aftermath of Hurricane Harvey in Texas and the wildfires in California. The severity of the Puerto Rico crisis, where a million people do not have clean water and millions are without power nearly a month after Hurricane Maria made landfall, demand a single person to focus exclusively on relief and recovery, the Democrats said. Forty-nine people have died in Puerto Rico officially, with dozens more missing. The hurricane did extensive damage to the islandâ€™s power grid, destroying homes, roads and other vital infrastructure. Now, the bankrupt territory is struggling to provide basic services like running water, and pay its bills. â€œItâ€™s tragically clear this Administration was caught flat footed when Maria hit Puerto Rico,â€ said VelÃ zquez. â€œAppointing a CEO of Response and Recovery will, at last, put one person with authority in charge to manage the response and ensure we are finally getting the people of Puerto Rico the aid they need.â€ On Thursday, Trump said the federal response has been a â€œ10â€ on a scale of one to 10 at a meeting with Puerto Rico Governor Ricardo Rossello.  The governor has asked the White House and Congress for at least $4.6 billion in block grants and other types of funding. Senator Marco Rubio called on Congress to modify an $18.7 billion aid package for areas damaged by a recent swath of hurricanes to ensure that Puerto Rico can quickly access the funds. "
stemmed_input = stemming(new_input_text)
input_transformed = vectorizer.transform([str(stemmed_input)])
predicted_sentiment = model.predict(input_transformed)
if predicted_sentiment[0] == 0:
    print("Fake news")
else:
    print("Real news")

Fake news


In [24]:
import pickle
with open('news_model.pkl', 'wb') as file:
    pickle.dump(model, file)
with open('news_vectorizer.pkl', 'wb') as file:
    pickle.dump(vectorizer, file)