<h1><b> IMPORTING LIBRARIES </b></h1>

In [106]:
import numpy as np
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from pprint import pprint

#MODELS
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.tree import DecisionTreeClassifier


In [107]:
data = pd.read_csv('train.csv')
data.head()

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


In [108]:
data.shape

(20800, 5)

In [109]:
# counting the number of missing values in the dataset
data.isnull().sum()

id           0
title      558
author    1957
text        39
label        0
dtype: int64

In [110]:
# After finding out that there are null values in the dataset, we decided to remove the null values with empty string

# replacing the null values with empty string
data = data.fillna('')

# checking the number of missing values in the dataset
data.isnull().sum()

id        0
title     0
author    0
text      0
label     0
dtype: int64

In [111]:
data.columns

Index(['id', 'title', 'author', 'text', 'label'], dtype='object')

In [112]:
# Dropping the labels to suit with the data that we are going to train and test
data = data.drop(['id', 'title', 'author'], axis=1)

In [113]:
data.head()

Unnamed: 0,text,label
0,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,Ever get the feeling your life circles the rou...,0
2,"Why the Truth Might Get You Fired October 29, ...",1
3,Videos 15 Civilians Killed In Single US Airstr...,1
4,Print \nAn Iranian woman has been sentenced to...,1


<h2> <b> STEMMING PROCESS </b> </h2>

In [114]:
port_stem = PorterStemmer()

def stemming(topic):
    review = re.sub('[^a-zA-Z]',' ',topic)
    review = review.lower()
    review = review.split()
    review = [port_stem.stem(word) for word in review if not word in stopwords.words('english')]
    review = ' '.join(review)
    return review

In [115]:
data['text'] = data['text'].apply(stemming)

In [116]:
x = data['text']
y = data['label']

In [117]:
y.shape

(20800,)

<h2> <b> METHOD 1: DECISION TREE CLASSIFICATION </b> </h2>

In [118]:
x1_train, x1_test, y1_train, y1_test = train_test_split(x, y, test_size = 0.2, stratify=y, random_state=2)

In [119]:
vectorizer = TfidfVectorizer()
x1_train=vectorizer.fit_transform(x1_train)
x1_test=vectorizer.transform(x1_test)

In [120]:
model1 = DecisionTreeClassifier()
model1.fit(x1_train, y1_train)

In [121]:
# accuracy score on the test data
x1_test_prediction = model1.predict(x1_test)
test1_data_accuracy = accuracy_score(x1_test_prediction, y1_test)

print('The accuracy of the test data:', test1_data_accuracy)

# Round the accuracy to three decimal places and format as a percentage
formatted_accuracy = "{:.3f}".format(test1_data_accuracy * 100)

print('The accuracy of the test data in percentage form:', formatted_accuracy + '%')

The accuracy of the test data: 0.8737980769230769
The accuracy of the test data in percentage form: 87.380%


In [122]:
# Assuming x1_test_prediction contains predicted labels and y1_test contains true labels
conf_matrix = confusion_matrix(y1_test, x1_test_prediction)

print("Confusion Matrix:")
print(conf_matrix)

print(classification_report(y1_test, x1_test_prediction))

Confusion Matrix:
[[1793  284]
 [ 241 1842]]
              precision    recall  f1-score   support

           0       0.88      0.86      0.87      2077
           1       0.87      0.88      0.88      2083

    accuracy                           0.87      4160
   macro avg       0.87      0.87      0.87      4160
weighted avg       0.87      0.87      0.87      4160



<h2> <b> METHOD 2: LOGISTIC REGRESSION CLASSIFICATION </b> </h2>

In [123]:
x2_train, x2_test, y2_train, y2_test = train_test_split(x, y, test_size = 0.2, stratify=y, random_state=2)

In [124]:
vectorizer = TfidfVectorizer()
x2_train=vectorizer.fit_transform(x2_train)
x2_test=vectorizer.transform(x2_test)

In [125]:
model2 = LogisticRegression()
model2.fit(x2_train, y2_train)

In [126]:
# accuracy score on the test data
x2_test_prediction = model2.predict(x2_test)
test2_data_accuracy = accuracy_score(x2_test_prediction, y2_test)

print('The accuracy of the test data:', test2_data_accuracy)

# Round the accuracy to three decimal places and format as a percentage
formatted_accuracy = "{:.3f}".format(test2_data_accuracy * 100)

print('The accuracy of the test data in percentage form:', formatted_accuracy + '%')

The accuracy of the test data: 0.9394230769230769
The accuracy of the test data in percentage form: 93.942%


In [127]:
# Assuming x2_test_prediction contains predicted labels and y2_test contains true labels
conf_matrix = confusion_matrix(y2_test, x2_test_prediction)

print("Confusion Matrix:")
print(conf_matrix)

print(classification_report(y2_test, x2_test_prediction))

Confusion Matrix:
[[1947  130]
 [ 122 1961]]
              precision    recall  f1-score   support

           0       0.94      0.94      0.94      2077
           1       0.94      0.94      0.94      2083

    accuracy                           0.94      4160
   macro avg       0.94      0.94      0.94      4160
weighted avg       0.94      0.94      0.94      4160



<h2> <b> METHOD 3: MULTINOMIAL NAIVE BAYES CLASSIFICATION</b> </h2>

In [128]:
x3_train, x3_test, y3_train, y3_test = train_test_split(x, y, test_size = 0.2, stratify=y, random_state=2)

In [129]:
vectorizer = TfidfVectorizer()
x3_train=vectorizer.fit_transform(x3_train)
x3_test=vectorizer.transform(x3_test)

In [130]:
model3 = MultinomialNB()
model3.fit(x3_train, y3_train)

In [131]:
# accuracy score on the test data
x3_test_prediction = model3.predict(x3_test)
test3_data_accuracy = accuracy_score(x3_test_prediction, y3_test)

print('The accuracy of the test data:', test3_data_accuracy)

# Round the accuracy to three decimal places and format as a percentage
formatted_accuracy = "{:.3f}".format(test3_data_accuracy * 100)

print('The accuracy of the test data in percentage form:', formatted_accuracy + '%')

The accuracy of the test data: 0.8711538461538462
The accuracy of the test data in percentage form: 87.115%


In [132]:
# Assuming x3_test_prediction contains predicted labels and y3_test contains true labels
conf_matrix = confusion_matrix(y3_test, x3_test_prediction)

print("Confusion Matrix:")
print(conf_matrix)

print(classification_report(y3_test, x3_test_prediction))

Confusion Matrix:
[[2041   36]
 [ 500 1583]]
              precision    recall  f1-score   support

           0       0.80      0.98      0.88      2077
           1       0.98      0.76      0.86      2083

    accuracy                           0.87      4160
   macro avg       0.89      0.87      0.87      4160
weighted avg       0.89      0.87      0.87      4160



<h2> <b> METHOD 4: PASSIVE AGGRESSIVE CLASSIFICATION</b> </h2>

In [133]:
x4_train, x4_test, y4_train, y4_test = train_test_split(x, y, test_size = 0.2, stratify=y, random_state=2)

In [134]:
vectorizer = TfidfVectorizer()
x4_train=vectorizer.fit_transform(x4_train)
x4_test=vectorizer.transform(x4_test)

In [135]:
model4 = PassiveAggressiveClassifier()
model4.fit(x4_train, y4_train)

In [136]:
# accuracy score on the test data
x4_test_prediction = model4.predict(x4_test)
test4_data_accuracy = accuracy_score(x4_test_prediction, y4_test)

print('The accuracy of the test data:', test4_data_accuracy)

# Round the accuracy to three decimal places and format as a percentage
formatted_accuracy = "{:.3f}".format(test4_data_accuracy * 100)

print('The accuracy of the test data in percentage form:', formatted_accuracy + '%')

The accuracy of the test data: 0.9516826923076923
The accuracy of the test data in percentage form: 95.168%


In [137]:
# Assuming x4_test_prediction contains predicted labels and y4_test contains true labels
conf_matrix = confusion_matrix(y4_test, x4_test_prediction)

print("Confusion Matrix:")
print(conf_matrix)

print(classification_report(y4_test, x4_test_prediction))

Confusion Matrix:
[[1977  100]
 [ 101 1982]]
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2077
           1       0.95      0.95      0.95      2083

    accuracy                           0.95      4160
   macro avg       0.95      0.95      0.95      4160
weighted avg       0.95      0.95      0.95      4160



<h2> <b> FAKE NEWS DETECTION USING THE BEST METHOD (PASSIVE AGRESSIVE REGRESSION)</b> </h2>

In [138]:
import pickle
pickle.dump(vectorizer, open('vector.pkl', 'wb'))
pickle.dump(model4, open('model.pkl', 'wb'))
vector_form = pickle.load(open('vector.pkl', 'rb'))
load_model = pickle.load(open('model.pkl', 'rb'))

In [139]:
def news_detection(news):
    news = stemming(news)
    input_data = [news]
    vector_form1 = vector_form.transform(input_data)
    prediction = load_model.predict(vector_form1)
    return prediction

In [140]:
val = news_detection(""" 
                     
The scandal engulfing Wells Fargo toppled its chairman and chief executive on Wednesday, as John G. Stumpf announced his departure from the company, effective immediately. The move was a swift and stunning fall for an executive whose bank made it through the 2008 financial crisis relatively unscathed, only to be undone by a   sales scandal that pervaded its community banking division and percolated under the surface for years. It was an extraordinary moment even in the banking industry, which has been bedeviled by criticism and regular scandals since the financial crisis. Despite the industry’s many troubles, relatively few banking chiefs have stepped down under outside pressure. But Wells Fargo’s transgressions were unusually blatant and straightforward, which contributed to the   public outcry. This time, there were no exotic financial instruments, complicated trades or complex mortgage trickery. The bank’s misdeeds were fundamentally simple: Under intense pressure to meet aggressive sales goals, employees created sham accounts using the names  —   and sometimes, the actual money  —   of the bank’s real customers. And in some cases the customers did not discover the activity until they started accumulating fees. The reaction to the fake accounts at Wells Fargo, and to Mr. Stumpf’s response to what has been uncovered as a widespread problem, has been withering and unrelenting. Wells Fargo has been under fire from irate customers, regulators, lawmakers and former employees since last month, when it disclosed that it would pay $185 million in fines and penalties over the unauthorized creation of as many as two million bank and credit card accounts over a period of at least five years by branch employees who were said to be under intense pressure to hit ambitious product sales goals. The sales practices may reach back far longer. Former bank employees told The New York Times of concerns they raised internally as far back as 11 years ago. Mr. Stumpf, 63, who was twice called in front of Congress to testify about the scandal in recent weeks, faced vocal demands to resign from an outraged public, but the timing of his departure was unexpected given that the Wells Fargo board had not completed its internal investigation into the phony accounts and that the bank reports   earnings on Friday. He potentially walks away with millions of dollars, including a $20 million pension, as of Wednesday, another $4. 3 million in deferred compensation, plus stock worth $109 million, according to data from Equilar. As the scandal erupted, Mr. Stumpf, who grew up on a dairy and poultry farm in Minnesota and had long emphasized his folksy roots, was unable to quell the uproar. Rather, he often seemed to inflame it. During his testimony in front of Congress, Mr. Stumpf said he took responsibility for the problems, but would not admit that there anything wrong with the bank’s broader culture. When pressed repeatedly by members of the Senate Banking Committee about why he had not given back any of his lavish compensation, Mr. Stumpf insisted that the decision was not up to him, but to the bank’s board. He was reminded that he was the board’s chairman. One senator on the banking committee noted that criticism of the Wells Fargo scandal was one of the few things that Democrats and Republicans had agreed on in a long time. Mr. Stumpf’s spent 34 years at a bank that had once been regarded as a darling on Wall Street because of its ability to churn out profits even as other banks struggled. Mr. Stumpf was named chief executive in 2007 and was one of the country’s   bankers, earning about $19 million last year. He was named the 2013 Banker of the Year by the trade publication American Banker. Wells Fargo was once the most valuable bank as measured by the price of its stock, which attracted the billionaire Warren E. Buffett as its largest shareholder. Mr. Buffett’s company, Berkshire Hathaway, has a stake of about 10 percent. Mr. Buffett did not respond to a request for comment on Wednesday. Wells Fargo has since ceded the most valuable bank distinction to JPMorgan Chase. The board received a letter from Mr. Stumpf early on Wednesday indicating that he intended to retire, according to a person briefed on the matter. This person said the board was still in the early stages of conducting its investigation into the fake accounts and Mr. Stumpf’s handling of the scandal and had not drawn any conclusions from its inquiry. The board was not scheduled to meet on Wednesday when it received Mr. Stumpf’s letter, which this person described as brief and lacking any indication as to why the longtime C. E. O. decided to leave now. Top bank executives were expecting that Mr. Stumpf would address analysts and investors  —   as he has always done  —   when Wells Fargo reports its   results on Friday. Wells Fargo said Mr. Stumpf would be replaced by Timothy J. Sloan, its president and chief operating officer. Just two days earlier, the San Francisco bank shuffled its top management to give more responsibility to Mr. Sloan. “I am grateful for the opportunity to have led Wells Fargo,” Mr. Stumpf said in a written statement. “While I have been deeply committed and focused on managing the company through this period, I have decided it is best for the company that I step aside. ” Mr. Sloan was informed early Wednesday that he was the bank’s new C. E. O. though he had known for several days that Mr. Stumpf was preparing to step down. In an interview, Mr. Sloan said Mr. Stumpf had told him of his decision to retire after concluding that he would most likely continue to be the focus of much of the criticism being leveled at the bank. “It was an incredibly selfless decision,” Mr. Sloan said. Federal regulators and the Los Angeles city attorney began looking into the issue in 2013. The deal Wells Fargo announced last month to settle cases brought by their offices was intended to resolve the matter, but it instead opened the floodgates, provoking a furious outpouring of questions, criticism and new information about what former employees say was a   culture of toxic sales pressure. The Department of Justice and Labor Department opened their own inquiries. Mr. Sloan said the bank had already named a new head of community banking, ended the retail banking sales goals that employees blamed for putting them under undue pressure and extended its review of its sales practices back to 2009. Originally, the bank’s review covered its activities only between 2011 and 2015. Mr. Sloan will not hold the dual of roles of chief executive and chairman as Mr. Stump had. Mr. Stumpf agreed last month to surrender stock grants valued at $41 million  —   another highly unusual move on Wall Street, where clawback provisions have been widely adopted but are almost never deployed. Mr. Stumpf said he would give up his $2. 8 million annual salary for the duration of the board’s investigation. Because his departure is a resignation, Mr. Stumpf will leave without any severance, according to Oscar Suris, a company spokesman. Senator Elizabeth Warren, the Massachusetts Democrat who blasted Mr. Stumpf when he appeared before the Senate last month, fired off a scathing statement on Wednesday. “As I said at the hearing last month, Mr. Stumpf should resign, return every nickel he made while this scam was going on, and face an investigation by the Justice Department and S. E. C. ,” she said. “So far, he’s one for three.
                                                   
                     """)

if val == [0]:
    print('The news is reliable.')
else:
    print('The news is unreliable')

The news is reliable.
