### IMPORTING LIBRARIES

In [44]:
import pandas as pd
import numpy as np

In [45]:
true_news = pd.read_csv(r"C:\Users\NAVJOT\Downloads\True.csv")

In [46]:
fake_news = pd.read_csv(r"C:\Users\NAVJOT\Downloads\Fake.csv")

In [47]:
true_news['label']=1

In [48]:
fake_news['label']=0

In [49]:
news = pd.concat([fake_news, true_news], axis=0)

In [50]:
news.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


In [51]:
news.tail()

Unnamed: 0,title,text,subject,date,label
21412,'Fully committed' NATO backs new U.S. approach...,BRUSSELS (Reuters) - NATO allies on Tuesday we...,worldnews,"August 22, 2017",1
21413,LexisNexis withdrew two products from Chinese ...,"LONDON (Reuters) - LexisNexis, a provider of l...",worldnews,"August 22, 2017",1
21414,Minsk cultural hub becomes haven from authorities,MINSK (Reuters) - In the shadow of disused Sov...,worldnews,"August 22, 2017",1
21415,Vatican upbeat on possibility of Pope Francis ...,MOSCOW (Reuters) - Vatican Secretary of State ...,worldnews,"August 22, 2017",1
21416,Indonesia to buy $1.14 billion worth of Russia...,JAKARTA (Reuters) - Indonesia will buy 11 Sukh...,worldnews,"August 22, 2017",1


In [52]:
news.isnull().sum()

title      0
text       0
subject    0
date       0
label      0
dtype: int64

In [53]:
news = news.drop(['title', 'subject', 'date'], axis=1)

#### RESUFFLING THE DATA TO PREVENT BIAS IN THE MODEL

In [54]:
news = news.sample(frac=1)

In [55]:
news.head()

Unnamed: 0,text,label
7039,"PALM BEACH, Fla./WASHINGTON (Reuters) - U.S. P...",1
14150,"On March 20, 2016 Barkley had this to say abou...",0
7568,(This version of the November 3 story officia...,1
14015,"MAR DEL PLATA, Argentina/BUENOS AIRES (Reuters...",1
9048,You probably can recall the huge diplomatic me...,0


In [56]:
news.reset_index(inplace=True)

In [57]:
news.head()

Unnamed: 0,index,text,label
0,7039,"PALM BEACH, Fla./WASHINGTON (Reuters) - U.S. P...",1
1,14150,"On March 20, 2016 Barkley had this to say abou...",0
2,7568,(This version of the November 3 story officia...,1
3,14015,"MAR DEL PLATA, Argentina/BUENOS AIRES (Reuters...",1
4,9048,You probably can recall the huge diplomatic me...,0


In [58]:
news.drop(['index'], axis=1, inplace = True)

In [59]:
news.head()

Unnamed: 0,text,label
0,"PALM BEACH, Fla./WASHINGTON (Reuters) - U.S. P...",1
1,"On March 20, 2016 Barkley had this to say abou...",0
2,(This version of the November 3 story officia...,1
3,"MAR DEL PLATA, Argentina/BUENOS AIRES (Reuters...",1
4,You probably can recall the huge diplomatic me...,0


#### MAKING THE DATA MORE READABLE FOR  THE ML MODEL

In [60]:
import re

In [61]:
def wordconv(text):
    #converting to lower case 
    text = text.lower()
    #removing urls
    text = re.sub(r'https?://\S+|www\.\S+','',text)
    #removing html tags
    text = re.sub(r'<,*?>', '',text)
    # removing punctuation
    text = re.sub(r'[^\w\s]', '', text)
    # removing digits
    text = re.sub(r'\d', '', text)
    #removing newline characters
    text = re.sub(r'\n', ' ', text)
    
    return text


In [62]:
news['text'] = news['text'].apply(wordconv)

In [63]:
news['text']

0        palm beach flawashington reuters  us president...
1        on march   barkley had this to say about obama...
2         this version of the november  story officiall...
3        mar del plata argentinabuenos aires reuters  t...
4        you probably can recall the huge diplomatic me...
                               ...                        
44893    an arlington woman was arrested this week and ...
44894    london reuters  britain s opposition labour le...
44895    astana reuters  turkey and iran have agreed to...
44896    washington reuters  us house of representative...
44897    brussels reuters  the united states cannot uni...
Name: text, Length: 44898, dtype: object

In [64]:
x = news['text']
y = news['label']

#### DIVIDING DATA INTO TRAINING DATA AND TEST DATA

In [65]:
from sklearn.model_selection import train_test_split

In [66]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

#### CONVERTING DATA INTO NUMERICAL DATA

In [109]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [110]:
vectorization = TfidfVectorizer()

In [111]:
xv_train = vectorization.fit_transform(x_train)

# Transform the test data
xv_test = vectorization.transform(x_test)

In [112]:
xv_train

<31428x175005 sparse matrix of type '<class 'numpy.float64'>'
	with 6443569 stored elements in Compressed Sparse Row format>

In [113]:
xv_test

<13470x175005 sparse matrix of type '<class 'numpy.float64'>'
	with 2737547 stored elements in Compressed Sparse Row format>

#### CREATING A ML MODEL USING LOGISTIC REGRESSION

In [114]:
lr = LogisticRegression()

In [115]:
lr.fit(xv_train, y_train)

In [116]:
pred_lr = lr.predict(xv_test)

In [117]:
lr.score(xv_test, y_test)

0.9876020786933927

In [118]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7079
           1       0.98      0.99      0.99      6391

    accuracy                           0.99     13470
   macro avg       0.99      0.99      0.99     13470
weighted avg       0.99      0.99      0.99     13470



In [137]:
def output_label(n):
    if n==0:
        return "It is a Fake News"
    elif n==1:
        return "It is a Genuine News"

In [141]:
def manual_testing(news):
    testing_news = {"text": [news]} # Corrected syntax for defining dictionary
    new_def_test = pd. DataFrame (testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordconv)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test) 
    pred_lr = lr.predict(new_xv_test)   
    return f"\n\nLR Prediction: {output_label(pred_lr[0])}"


In [144]:
news_article = str(input())

A court here on Wednesday summoned Chief Minister Arvind Kejriwal on July 12 in a money laundering case linked to the now-scrapped 2021-22 Delhi excise policy.  Special Judge (Prevention of Corruption Act) Kaveri Baweja of the Rouse Avenue court issued a production warrant for Mr. Kejriwal while taking cognisance of the eighth chargesheet filed by the Enforcement Directorate (ED) in the case.  The ED had filed the chargesheet on May 17 naming the CM and Delhi’s ruling Aam Aadmi Party (AAP) as accused. The case involves allegations that certain liquor retailers and manufacturers were favoured in exchange for money, which was used by the party for campaigning in the 2022 Goa Assembly poll.  Mr. Kejriwal is currently lodged in Tihar Jail. He was arrested by the ED on March 21.  The Supreme Court granted a 21-day interim bail to the Delhi CM on May 10 to campaign in the Lok Sabha election while hearing his plea against his arrest.  However, the AAP national convener had to return to Tihar 

In [145]:
manual_testing(news_article)

'\n\nLR Prediction: It is a Genuine News'