In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fake-news-detection-78k/fake_real_news_78k.csv


**Importing the Required Libraries**

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

**Storing the Data into a DataFrame**

In [3]:
df = pd.read_csv("/kaggle/input/fake-news-detection-78k/fake_real_news_78k.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,0,LAW ENFORCEMENT ON HIGH ALERT Following Threat...,No comment is expected from Barack Obama Membe...,TRUE
1,1,,Did they post their votes for Hillary already?,TRUE
2,2,UNBELIEVABLE! OBAMA’S ATTORNEY GENERAL SAYS MO...,"Now, most of the demonstrators gathered last ...",TRUE
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,FAKE
4,4,SATAN 2: Russia unvelis an image of its terrif...,"The RS-28 Sarmat missile, dubbed Satan 2, will...",TRUE


**Separating the Combined Data (Grouping By their Labels)**

In [5]:
df_False, df_True = [x for _, x in df.groupby(df['label'] == "TRUE")]

In [6]:
df_True.shape

(40277, 4)

In [7]:
df_False.shape

(38192, 4)

**Assigning the DataFrames their Classes**

In [8]:
df_False["class"] = 0
df_True["class"] = 1

**Merging them as the labelling is done**

In [9]:
df_merge = pd.concat([df_False, df_True], axis =0 )
df_merge.head(10)
#df_merge.tail(10)

Unnamed: 0.1,Unnamed: 0,title,text,label,class
3,3,"Bobby Jindal, raised Hindu, uses story of Chri...",A dozen politically active pastors came here f...,FAKE,0
11,11,"May Brexit offer would hurt, cost EU citizens ...",BRUSSELS (Reuters) - British Prime Minister Th...,FAKE,0
12,12,Schumer calls on Trump to appoint official to ...,"WASHINGTON (Reuters) - Charles Schumer, the to...",FAKE,0
14,14,No Change Expected for ESPN Political Agenda D...,As more and more sports fans turn off ESPN to ...,FAKE,0
15,15,Billionaire Odebrecht in Brazil scandal releas...,RIO DE JANEIRO/SAO PAULO (Reuters) - Billionai...,FAKE,0
17,17,U.N. seeks humanitarian pause in Sanaa where s...,GENEVA (Reuters) - The United Nations called o...,FAKE,0
19,19,Second judge says Clinton email setup may have...,NEW YORK (Reuters) - A second federal judge ha...,FAKE,0
26,26,Supreme Court Won’t Hear Appeal From Texas on ...,WASHINGTON — The Supreme Court rejected on ...,FAKE,0
27,27,Islamic State driven out of last stronghold in...,BAGHDAD (Reuters) - Iraqi forces announced on ...,FAKE,0
28,28,Senators Propose Giving States Option to Keep ...,WASHINGTON — Several Republican senators on...,FAKE,0


In [10]:
df_merge.columns

Index(['Unnamed: 0', 'title', 'text', 'label', 'class'], dtype='object')

**Dropping the Un-necessary Columns**

In [11]:
df_merge_drop = df_merge.drop(["Unnamed: 0","title","label"], axis=1)

**Finding the number of "NULL" values in the DataSet**

In [12]:
df_merge_drop.isnull().sum()

text     39
class     0
dtype: int64

In [13]:
df_merge_drop = df_merge_drop.sample(frac = 1)
df_merge_drop.head(10)

Unnamed: 0,text,class
54325,WASHINGTON (Reuters) - U.S. Senate Armed Servi...,0
57657,ROME (AP) — Italian rescue ships have pluck...,0
60508,WASHINGTON (Reuters) - U.S. Vice President Mik...,0
57887,ISLAMABAD (Reuters) - Gunmen on motorcycles op...,0
67292,Tune in to the Alternate Current Radio Network...,1
73959,Despite all the talk that Pope Francis’ addres...,1
11364,PARIS (Reuters) - French President Emmanuel Ma...,0
75634,When Thomas Bouwsma’s wife Patty was pregnant ...,1
24751,"On Tuesday, China slammed the United States’ d...",0
2995,Does anyone even care that this American man w...,1


**Arranging the DataSet according to index**

In [14]:
df_merge_drop.reset_index(inplace = True)
df_merge_drop.drop(["index"], axis = 1, inplace = True)

In [15]:
df_merge_drop.columns
df_merge_drop.head()

Unnamed: 0,text,class
0,WASHINGTON (Reuters) - U.S. Senate Armed Servi...,0
1,ROME (AP) — Italian rescue ships have pluck...,0
2,WASHINGTON (Reuters) - U.S. Vice President Mik...,0
3,ISLAMABAD (Reuters) - Gunmen on motorcycles op...,0
4,Tune in to the Alternate Current Radio Network...,1


**Defining Text Pre-Processing using "Regular Expression" (RegEx)**

In [16]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

**Removing the Numeric Data from the Dataset, so that the can be fed to the Model**

In [17]:
from string import digits

for i in range(0,len(df_merge_drop)):
    remove_digits = str.maketrans('', '', digits)
    ini_string=str(df_merge_drop["text"][i])
    df_merge_drop["text"][i] = ini_string.translate(remove_digits)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


**Applying Text Pre-Processing using "Regular Expression" (RegEx)**

In [18]:
df_merge_drop["text"] = df_merge_drop["text"].apply(wordopt)

**Assigning 'x' and 'y' the columns from the DataSet**

In [19]:
x = df_merge_drop["text"]
y = df_merge_drop["class"]

**Performing Train-Test Split**

In [20]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

**Importing TFIDF Vectorizer to Encode the Text Data**

In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

**Prediction using 'LOGISTIC REGRESSION'**

In [None]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train,y_train)

In [23]:
pred_lr=LR.predict(xv_test)

In [24]:
LR.score(xv_test, y_test)

0.8775614231827913

In [25]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.90      0.84      0.87      9490
           1       0.86      0.91      0.88     10128

    accuracy                           0.88     19618
   macro avg       0.88      0.88      0.88     19618
weighted avg       0.88      0.88      0.88     19618



**Prediction using 'DECISION TREE'**

In [26]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

DecisionTreeClassifier()

In [27]:
pred_dt = DT.predict(xv_test)

In [28]:
DT.score(xv_test, y_test)

0.7772453868895912

In [29]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       0.75      0.80      0.78      9490
           1       0.80      0.76      0.78     10128

    accuracy                           0.78     19618
   macro avg       0.78      0.78      0.78     19618
weighted avg       0.78      0.78      0.78     19618



**Prediction using 'GRADIENT BOOSTING'**

In [30]:
from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier(random_state=0)
GBC.fit(xv_train, y_train)

GradientBoostingClassifier(random_state=0)

In [31]:
pred_gbc = GBC.predict(xv_test)

In [32]:
GBC.score(xv_test, y_test)

0.875777347334081

In [33]:
print(classification_report(y_test, pred_gbc))

              precision    recall  f1-score   support

           0       0.92      0.82      0.86      9490
           1       0.84      0.93      0.89     10128

    accuracy                           0.88     19618
   macro avg       0.88      0.87      0.87     19618
weighted avg       0.88      0.88      0.88     19618



**Prediction using 'RANDOM FOREST'**

In [None]:
from sklearn.ensemble import RandomForestClassifier

RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)

In [None]:
pred_rfc = RFC.predict(xv_test)

In [None]:
RFC.score(xv_test, y_test)

In [None]:
print(classification_report(y_test, pred_rfc))

**Performing the Manual Testing of Models using the 'news' input by the User**

In [None]:
def output_lable(n):
    if n == 1:
        return "Fake News"
    elif n == 0:
        return "Not A Fake News"
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt) 
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)
    pred_GBC = GBC.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)

    print("\n\nLR Prediction: {}".format(output_lable(pred_LR[0])))
    print("\n\nRFC Prediction: {}".format(output_lable(pred_RFC[0])))
    print("\n\nGBC Prediction: {}".format(output_lable(pred_GBC[0])))
    print("\n\nDT Prediction: {}".format(output_lable(pred_DT[0])))

In [None]:
#news = str(input())
news = " Indian Politics is a complex and diverse system that encompasses a range of political ideologies, parties, and interests, with the potential to significantly impact the nation's social, economic, and geopolitical landscape. The Indian political system is based on a federal parliamentary democratic model, with power being distributed between the central government and the various state governments. Political parties play a crucial role in shaping the nation's policies and governance, with the Indian National Congress and the Bharatiya Janata Party being two of the most prominent political parties in the country. However, Indian politics also faces various challenges such as corruption, caste-based politics, communal tensions, and regionalism, which often pose significant obstacles to effective governance and political stability."
manual_testing(news)