## Step 1 - Project Problem Statement

## Step 2 - Data Gathering

### 2.1 Import Libraries

In [1]:
import pandas as pd
import numpy as np
import os
import glob
import re
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier,plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv(r"D:\python\Project\NLP 4th Project\fake_or_real_news.csv")
df

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...,...
6330,4490,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6331,8062,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6332,8622,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6333,4021,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


In [3]:
df.shape # shape of dataframe

(6335, 4)

In [4]:
df['label'].value_counts() # Values in Language feature

REAL    3171
FAKE    3164
Name: label, dtype: int64

In [5]:
df["Unnamed: 0"]

0        8476
1       10294
2        3608
3       10142
4         875
        ...  
6330     4490
6331     8062
6332     8622
6333     4021
6334     4330
Name: Unnamed: 0, Length: 6335, dtype: int64

In [6]:
# Unnamed: 0 is irrevent feature

df.drop("Unnamed: 0", axis =1, inplace =True)

## Step 3 - Data Cleaning

In [7]:
df.duplicated(keep = "first").value_counts() # check out the duplicate value in dataframe

False    6306
True       29
dtype: int64

In [8]:
# It shows True= 29 it means that there are 29 duplicate rows are present in the dataset.

In [9]:
# drop duplicate rows

df.drop_duplicates(keep="first", inplace=True, ignore_index=True)

In [10]:
df

Unnamed: 0,title,text,label
0,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL
...,...,...,...
6301,State Department says it can't find emails fro...,The State Department told the Republican Natio...,REAL
6302,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,FAKE
6303,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,FAKE
6304,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",REAL


## Step 4 - EDA (Exploratory Data Analysis)

In [11]:
df.shape # shape of dataframe

(6306, 3)

In [12]:
# df.describe()
# Check out the discription of dataset,
# it shows the count, unique,top and frequency of all features separately

df.describe()

Unnamed: 0,title,text,label
count,6306,6306,6306
unique,6256,6060,2
top,Michael Moore Owes Me $4.99,"Killing Obama administration rules, dismantlin...",REAL
freq,3,57,3154


In [13]:
# df.info()
# It gives the information of dataset
# It shows the non null count and datatypes of every feature

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6306 entries, 0 to 6305
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   6306 non-null   object
 1   text    6306 non-null   object
 2   label   6306 non-null   object
dtypes: object(3)
memory usage: 147.9+ KB


In [14]:
df.isna().sum() # check null value count of every feature

title    0
text     0
label    0
dtype: int64

In [15]:
# it clearly indicates that there is no null value in dataset.

## Label Encoding

In [16]:
df["label"].value_counts() # Values in Language feature

REAL    3154
FAKE    3152
Name: label, dtype: int64

In [17]:
# Real = 3154 and Fake = 3152 
# Both values are almost similar so no need of data balancing

In [18]:
encoder = LabelEncoder()
df['label'] = encoder.fit_transform(df['label'])
df

Unnamed: 0,title,text,label
0,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",0
1,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,0
2,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,1
3,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",0
4,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,1
...,...,...,...
6301,State Department says it can't find emails fro...,The State Department told the Republican Natio...,1
6302,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,0
6303,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...,0
6304,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene...",1


In [19]:
class_list = encoder.classes_
class_list

array(['FAKE', 'REAL'], dtype=object)

In [20]:
import pickle
encoding = pickle.dump(encoder,open('encoder.pkl','wb'))

In [21]:
# split the dataset into dependent and independent feature

x=df.drop("label", axis=1)
y=df["label"]

In [22]:
x # independent feature

Unnamed: 0,title,text
0,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello..."
1,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...
2,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...
3,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T..."
4,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...
...,...,...
6301,State Department says it can't find emails fro...,The State Department told the Republican Natio...
6302,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...,The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
6303,Anti-Trump Protesters Are Tools of the Oligarc...,Anti-Trump Protesters Are Tools of the Oligar...
6304,"In Ethiopia, Obama seeks progress on peace, se...","ADDIS ABABA, Ethiopia —President Obama convene..."


In [23]:
y # dependent feature

0       0
1       0
2       1
3       0
4       1
       ..
6301    1
6302    0
6303    0
6304    1
6305    1
Name: label, Length: 6306, dtype: int32

In [24]:
x.columns

Index(['title', 'text'], dtype='object')

In [25]:
# if x contain 2 columns then train_test_split fetch an error 
# ValueError: Found input variables with inconsistent numbers of samples: [2, 6306]
 
############
# To avoid this error we can delete one less important column 
###########

df.drop("title", axis =1, inplace =True)

In [26]:
df.shape

(6306, 2)

In [27]:
# again split the dataset into dependent and independent feature

x=df["text"]
y=df["label"]

In [28]:
x # independent feature

0       Daniel Greenfield, a Shillman Journalism Fello...
1       Google Pinterest Digg Linkedin Reddit Stumbleu...
2       U.S. Secretary of State John F. Kerry said Mon...
3       — Kaydee King (@KaydeeKing) November 9, 2016 T...
4       It's primary day in New York and front-runners...
                              ...                        
6301    The State Department told the Republican Natio...
6302    The ‘P’ in PBS Should Stand for ‘Plutocratic’ ...
6303     Anti-Trump Protesters Are Tools of the Oligar...
6304    ADDIS ABABA, Ethiopia —President Obama convene...
6305    Jeb Bush Is Suddenly Attacking Trump. Here's W...
Name: text, Length: 6306, dtype: object

In [29]:
y # dependent feature

0       0
1       0
2       1
3       0
4       1
       ..
6301    1
6302    0
6303    0
6304    1
6305    1
Name: label, Length: 6306, dtype: int32

In [30]:
############### 1. Word embedding for count vec
count_vec = CountVectorizer(analyzer="word")
count_vec_x = count_vec.fit_transform(x)
cv_x_train, cv_x_test, cv_y_train,cv_y_test = train_test_split(count_vec_x,y,random_state=30, test_size=0.25, stratify=y)
print(cv_x_train.shape,cv_x_test.shape,cv_y_train.shape,cv_y_test.shape)

############### 2. Word embedding for tfidf vec
tfidf_vec = TfidfVectorizer(analyzer='word', min_df=0.05)
tfidf_vec_x = tfidf_vec.fit_transform(x)
tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test = train_test_split(tfidf_vec_x,y,random_state=30, test_size=0.25, stratify=y)
print(tfidf_x_train.shape, tfidf_x_test.shape, tfidf_y_train.shape,tfidf_y_test.shape)

############### 3. Word embedding for tfidf ngram vec
tfidf_ngram_vec = TfidfVectorizer(analyzer='word', ngram_range=(2,3),   min_df=0.02)
tfidf_ngram_vec_x = tfidf_ngram_vec.fit_transform(x)
tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test = train_test_split(tfidf_ngram_vec_x,y,random_state=30, test_size=0.25, stratify=y)
print(tfngram_x_train.shape, tfngram_x_test.shape, tfngram_y_train.shape,tfngram_y_test.shape)

(4729, 67659) (1577, 67659) (4729,) (1577,)
(4729, 1314) (1577, 1314) (4729,) (1577,)
(4729, 2955) (1577, 2955) (4729,) (1577,)


## Step 5 - Model Training

In [31]:
def train_model(model_name, x_train,x_test,y_train,y_test):
    """This function is for model trainingn"""    
    model_name.fit(x_train,y_train)   ### Model Training
    
    
    ############### model evaluation 
    
    ########### Test Data Evaluation 
    print('#'*50)
    print(f"TESTING DATA EVALUATION")
    y_pred_test = model_name.predict(x_test)
    acc_score = accuracy_score(y_test,y_pred_test)
    cnf_matrix = confusion_matrix(y_test,y_pred_test)
    clf_report = classification_report(y_test,y_pred_test)
    
    print(f"Accuracy_Score = {acc_score}")
    print(f"Confusion Matrix = \n{cnf_matrix}")
    print(f"Classification Report = \n{clf_report}")
    
    print('#'*50)
    print(f"TRAINING DATA EVALUATION")
    print()
    print()
    ########### training Data Evaluation 
    y_pred_train = model_name.predict(x_train)
    acc_score = accuracy_score(y_train,y_pred_train)
    cnf_matrix = confusion_matrix(y_train,y_pred_train)
    clf_report = classification_report(y_train,y_pred_train)
    
    print(f"Accuracy_Score = {acc_score}")
    print(f"Confusion Matrix = \n{cnf_matrix}")
    print(f"Classification Report = \n{clf_report}")
    
    return "Success"

## Step 6 - Model Evaluation

### 1. Logestic Regression

In [32]:
lgr_model = LogisticRegression(max_iter= 500)

train_model(lgr_model,cv_x_train, cv_x_test, cv_y_train,cv_y_test)
train_model(lgr_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)
train_model(lgr_model,tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.9194673430564363
Confusion Matrix = 
[[737  51]
 [ 76 713]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.91      0.94      0.92       788
           1       0.93      0.90      0.92       789

    accuracy                           0.92      1577
   macro avg       0.92      0.92      0.92      1577
weighted avg       0.92      0.92      0.92      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 0.9995770776062592
Confusion Matrix = 
[[2364    0]
 [   2 2363]]
Classification Report = 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2364
           1       1.00      1.00      1.00      2365

    accuracy                           1.00      4729
   macro avg       1.00      1.00      1.00      4729
weighted avg       1.00      1.00   

'Success'

## 2. K Nearest Neighbour (KNN)

In [33]:
knn_model = KNeighborsClassifier()

train_model(knn_model,cv_x_train, cv_x_test, cv_y_train,cv_y_test)
train_model(knn_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)
train_model(knn_model,tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.8046924540266328
Confusion Matrix = 
[[636 152]
 [156 633]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.80      0.81      0.81       788
           1       0.81      0.80      0.80       789

    accuracy                           0.80      1577
   macro avg       0.80      0.80      0.80      1577
weighted avg       0.80      0.80      0.80      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 0.8657221399873123
Confusion Matrix = 
[[2052  312]
 [ 323 2042]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.86      0.87      0.87      2364
           1       0.87      0.86      0.87      2365

    accuracy                           0.87      4729
   macro avg       0.87      0.87      0.87      4729
weighted avg       0.87      0.87   

'Success'

## 3. Decision Tree

In [34]:
dt_model = DecisionTreeClassifier(random_state=2)

train_model(dt_model,cv_x_train, cv_x_test, cv_y_train,cv_y_test)
train_model(dt_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)
train_model(dt_model,tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.8173747622067217
Confusion Matrix = 
[[644 144]
 [144 645]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.82      0.82      0.82       788
           1       0.82      0.82      0.82       789

    accuracy                           0.82      1577
   macro avg       0.82      0.82      0.82      1577
weighted avg       0.82      0.82      0.82      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 1.0
Confusion Matrix = 
[[2364    0]
 [   0 2365]]
Classification Report = 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2364
           1       1.00      1.00      1.00      2365

    accuracy                           1.00      4729
   macro avg       1.00      1.00      1.00      4729
weighted avg       1.00      1.00      1.00      47

'Success'

## 4. Random Forest

In [35]:
rf_model = RandomForestClassifier()

train_model(rf_model,cv_x_train, cv_x_test, cv_y_train,cv_y_test)
train_model(rf_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)
train_model(rf_model,tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.87000634115409
Confusion Matrix = 
[[692  96]
 [109 680]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.86      0.88      0.87       788
           1       0.88      0.86      0.87       789

    accuracy                           0.87      1577
   macro avg       0.87      0.87      0.87      1577
weighted avg       0.87      0.87      0.87      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 1.0
Confusion Matrix = 
[[2364    0]
 [   0 2365]]
Classification Report = 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2364
           1       1.00      1.00      1.00      2365

    accuracy                           1.00      4729
   macro avg       1.00      1.00      1.00      4729
weighted avg       1.00      1.00      1.00      4729

'Success'

## 5. Support Vector Machine

In [36]:
svc_model = SVC()

train_model(svc_model,cv_x_train, cv_x_test, cv_y_train,cv_y_test)
train_model(svc_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)
train_model(svc_model,tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.8459099556119214
Confusion Matrix = 
[[739  49]
 [194 595]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.79      0.94      0.86       788
           1       0.92      0.75      0.83       789

    accuracy                           0.85      1577
   macro avg       0.86      0.85      0.84      1577
weighted avg       0.86      0.85      0.84      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 0.8809473461619792
Confusion Matrix = 
[[2273   91]
 [ 472 1893]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.83      0.96      0.89      2364
           1       0.95      0.80      0.87      2365

    accuracy                           0.88      4729
   macro avg       0.89      0.88      0.88      4729
weighted avg       0.89      0.88   

'Success'

## 6. Naive Bayes Classifier

In [37]:
nb_model = MultinomialNB()

train_model(nb_model,cv_x_train, cv_x_test, cv_y_train,cv_y_test)
train_model(nb_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)
train_model(nb_model,tfngram_x_train, tfngram_x_test, tfngram_y_train,tfngram_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.8953709575142677
Confusion Matrix = 
[[681 107]
 [ 58 731]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.92      0.86      0.89       788
           1       0.87      0.93      0.90       789

    accuracy                           0.90      1577
   macro avg       0.90      0.90      0.90      1577
weighted avg       0.90      0.90      0.90      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 0.9306407274265173
Confusion Matrix = 
[[2120  244]
 [  84 2281]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.96      0.90      0.93      2364
           1       0.90      0.96      0.93      2365

    accuracy                           0.93      4729
   macro avg       0.93      0.93      0.93      4729
weighted avg       0.93      0.93   

'Success'

## Final Model Selection

In [38]:
lgr_model = LogisticRegression(max_iter= 500)

train_model(lgr_model,tfidf_x_train, tfidf_x_test, tfidf_y_train,tfidf_y_test)

##################################################
TESTING DATA EVALUATION
Accuracy_Score = 0.9042485732403297
Confusion Matrix = 
[[714  74]
 [ 77 712]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.90      0.91      0.90       788
           1       0.91      0.90      0.90       789

    accuracy                           0.90      1577
   macro avg       0.90      0.90      0.90      1577
weighted avg       0.90      0.90      0.90      1577

##################################################
TRAINING DATA EVALUATION


Accuracy_Score = 0.9308521886233876
Confusion Matrix = 
[[2206  158]
 [ 169 2196]]
Classification Report = 
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      2364
           1       0.93      0.93      0.93      2365

    accuracy                           0.93      4729
   macro avg       0.93      0.93      0.93      4729
weighted avg       0.93      0.93   

'Success'

In [39]:
import pickle
model = pickle.dump(lgr_model,open('model.pkl','wb'))

In [40]:
tfidf = pickle.dump(tfidf_vec,open('tfidf_vec.pkl','wb'))

## User Define Function

In [41]:
def prediction(article):
    text = ["".join(article)]
    user_count_vec = tfidf_vec.transform(text)
    result = lgr_model.predict(user_count_vec)
    class_list = encoder.classes_
    return (f"News = {class_list[result[0]]}")

In [42]:
df.head()

Unnamed: 0,text,label
0,"Daniel Greenfield, a Shillman Journalism Fello...",0
1,Google Pinterest Digg Linkedin Reddit Stumbleu...,0
2,U.S. Secretary of State John F. Kerry said Mon...,1
3,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",0
4,It's primary day in New York and front-runners...,1


In [43]:
article="""U.S. Secretary of State John F. Kerry said Monday that he will stop in Paris later this week, amid criticism that no top American officials attended Sundayâ€™s unity march against terrorism.

Kerry said he expects to arrive in Paris Thursday evening, as he heads home after a week abroad. He said he will fly to France at the conclusion of a series of meetings scheduled for Thursday in Sofia, Bulgaria. He plans to meet the next day with Foreign Minister Laurent Fabius and President Francois Hollande, then return to Washington.

The visit by Kerry, who has family and childhood ties to the country and speaks fluent French, could address some of the criticism that the United States snubbed France in its darkest hour in many years.

The French press on Monday was filled with questions about why neither President Obama nor Kerry attended Sundayâ€™s march, as about 40 leaders of other nations did. Obama was said to have stayed away because his own security needs can be taxing on a country, and Kerry had prior commitments.

Among roughly 40 leaders who did attend was Israeli Prime Minister Benjamin Netanyahu, no stranger to intense security, who marched beside Hollande through the city streets. The highest ranking U.S. officials attending the march were Jane Hartley, the ambassador to France, and Victoria Nuland, the assistant secretary of state for European affairs. Attorney General Eric H. Holder Jr. was in Paris for meetings with law enforcement officials but did not participate in the march.

Kerry spent Sunday at a business summit hosted by Indiaâ€™s prime minister, Narendra Modi. The United States is eager for India to relax stringent laws that function as barriers to foreign investment and hopes Modiâ€™s government will act to open the huge Indian market for more American businesses.

In a news conference, Kerry brushed aside criticism that the United States had not sent a more senior official to Paris as â€œquibbling a little bit.â€ He noted that many staffers of the American Embassy in Paris attended the march, including the ambassador. He said he had wanted to be present at the march himself but could not because of his prior commitments in India.

â€œBut that is why I am going there on the way home, to make it crystal clear how passionately we feel about the events that have taken place there,â€ he said.

â€œAnd I donâ€™t think the people of France have any doubts about Americaâ€™s understanding of what happened, of our personal sense of lo"""
prediction(article)

'News = REAL'

In [44]:
article = """President Joe Biden will announce new actions on climate change that he can take on his own just days after an influential Democratic senator quashed hopes for a sweeping legislative package of new environmental programs this year.

Biden is to unveil the latest efforts during a visit on Wednesday to a former coal-fired power plant in Somerset, Massachusetts, that is shifting to offshore wind manufacturing. It’s the embodiment of the transition to clean energy that Biden is seeking but has struggled to realize in the first 18 months of his presidency.

Wednesday’s executive actions include new initiatives to bolster the domestic offshore wind industry as well as efforts to help communities cope with soaring temperatures through programs administered by the Federal Emergency Management Agency and the Department of Health and Human Services, according to a White House official."""
prediction(article)

'News = REAL'

In [45]:
article = """Former Indian cricketer Syed Kirmani has backed Virat Kohli for the upcoming T20 World Cup, saying that once the star batter gets into form, he will be unstoppable.

Kohli is currently going through a lean patch in international cricket, with his last hundred coming way back in 2019. With the World Cup in Australia on the horizon, many have questioned the star batter's spot in the squad.

However, Kirmani backed the former India captain and said that he should be part of the team that will be travelling down under. Talking to Dainik Jagran, the World Cup winner said that Kohli has loads of experience and once he returns to form, the star batter will be unstoppable.

Kirmani also said that Kohli could be a game-changer during the tournament and a player with his abilities deserves to be in the squad.

“Virat Kohli has loads of experience. He should be in the T20 World Cup squad. Once Kohli returns to form, he will be unstoppable. He could be a game-changer. A player with Kohli’s experience and abilities deserves to be in the World Cup squad," said Kirmani.

Kirmani also added that if someone else was in Kohli's place, he would have lost his spot in the team, since the competition for places is tough at the moment. He ended by saying that an established player like Kohli should be given the benefit of the doubt.

ADVERTISEMENT
“There is a tough competition in the Indian team. Look, Had it been someone else in place of Virat, he would’ve been dropped from the team by now. But I feel that an established player should be given the benefit of the doubt," said Kirmani."""
prediction(article)

'News = REAL'

In [46]:
article = """Click Here To Learn More About Alexandra's Personalized Essences Psychic Protection Click Here for More Information on Psychic Protection! Implant Removal Series Click here to listen to the IRP and SA/DNA Process Read The Testimonials Click Here To Read What Others Are Experiencing! Copyright Â© 2012 by Galactic Connection. All Rights Reserved. 
Excerpts may be used, provided that full and clear credit is given to Alexandra Meadors and www.galacticconnection.com with appropriate and specific direction to the original content. Unauthorized use and/or duplication of any material on this website without express and written permission from its author and owner is strictly prohibited. Thank you. 
Privacy Policy 
By subscribing to GalacticConnection.com you acknowledge that your name and e-mail address will be added to our database. As with all other personal information, only working affiliates of GalacticConnection.com have access to this data. We do not give GalacticConnection.com addresses to outside companies, nor will we ever rent or sell your email address. Any e-mail you send to GalacticConnection.com is completely confidential. Therefore, we will not add your name to our e-mail list without your permission. Continue reading... Galactic Connection 2016 | Design & Development by AA at Superluminal Systems Sign Up forOur Newsletter 
Join our newsletter to receive exclusive updates, interviews, discounts, and more. Join Us!"""
prediction(article)

'News = FAKE'

In [47]:
columns_dict = {"col_name": ["text"]}
columns_dict

{'col_name': ['text']}

In [48]:
import json

In [49]:
with open('columns_name.json','w') as json_file:
    json.dump(columns_dict,json_file)