# STEP-1: IMPORTING REQUIRED LIBRARIES.

In [78]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import re
import string

# STEP-2: LOADING THE DATASET.

In [25]:
dt_fk = pd.read_csv(r"C:\Machine Learning Python\Fake.csv")
dt_tr = pd.read_csv(r"C:\Machine Learning Python\True.csv")
print("Fake-News Dataset: \n",dt_fk.head())
print("True-News Dataset: \n",dt_tr.head())

Fake-News Dataset: 
                                                title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  
0  December 31, 2017  
1  December 31, 2017  
2  December 30, 2017  
3  December 29, 2017  
4  December 25, 2017  
True-News Dataset: 
                                                title  \
0  As U.S. budget fight looms, Republicans

# STEP-3: ALLOCATING CLASS TO THE TARGET ATTRIBUTE.

In [26]:
dt_fk["class"]=0
dt_tr["class"]=1

# STEP-4: COLLECTING DATA FOR MANUAL TESTING.

In [83]:
dt_fk_testing = dt_fk.tail(10)
for i in range(23480,23470,-1):
    dt_fk_testing.drop([i],axis=0,inplace=True)

dt_tr_testing = dt_tr.tail(10)
for i in range(21416,21406,-1):
    dt_tr_testing.drop([i],axis=0,inplace=True)

dt_fk_testing["class"]=0
dt_tr_testing["class"]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt_fk_testing.drop([i],axis=0,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt_fk_testing.drop([i],axis=0,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt_fk_testing.drop([i],axis=0,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt_fk_testing.drop([i],axis=0,inplac

# STEP-5: MERGING TRUE AND FALSE DATASETS.

In [28]:
dt = pd.concat([dt_fk,dt_tr],axis=0)
print(dt.head())

                                               title  \
0   Donald Trump Sends Out Embarrassing New Year’...   
1   Drunk Bragging Trump Staffer Started Russian ...   
2   Sheriff David Clarke Becomes An Internet Joke...   
3   Trump Is So Obsessed He Even Has Obama’s Name...   
4   Pope Francis Just Called Out Donald Trump Dur...   

                                                text subject  \
0  Donald Trump just couldn t wish all Americans ...    News   
1  House Intelligence Committee Chairman Devin Nu...    News   
2  On Friday, it was revealed that former Milwauk...    News   
3  On Christmas day, Donald Trump announced that ...    News   
4  Pope Francis used his annual Christmas Day mes...    News   

                date  class  
0  December 31, 2017      0  
1  December 31, 2017      0  
2  December 30, 2017      0  
3  December 29, 2017      0  
4  December 25, 2017      0  


# STEP-6: DROPPING UNNECESSARY COLUMNS.

In [29]:
dt = dt.drop(["title","subject","date"],axis=1)
print(dt.head())
print(dt.head())
print(len(dt))

                                                text  class
0  Donald Trump just couldn t wish all Americans ...      0
1  House Intelligence Committee Chairman Devin Nu...      0
2  On Friday, it was revealed that former Milwauk...      0
3  On Christmas day, Donald Trump announced that ...      0
4  Pope Francis used his annual Christmas Day mes...      0
                                                text  class
0  Donald Trump just couldn t wish all Americans ...      0
1  House Intelligence Committee Chairman Devin Nu...      0
2  On Friday, it was revealed that former Milwauk...      0
3  On Christmas day, Donald Trump announced that ...      0
4  Pope Francis used his annual Christmas Day mes...      0
44898



# STEP-7: DEFINING A FUNCTION TO FILTER TEXT.

In [30]:
lt=[]
def wordfilter(text):
    text = text.lower()
    text = re.sub("\[.*?\]",' ',text)
    text = re.sub("\\W",' ',text)
    text = re.sub("https?://\S+|www\.\S+",' ',text)
    text = re.sub(r"<.*?>+",' ',text)
    text = re.sub("[%s]" %re.escape(string.punctuation),' ',text)
    text = re.sub("\n",' ',text)
    text = re.sub("\w*\d\w",' ',text)
    return text

start_time = time.time()
dt["text"] = dt["text"].apply(wordfilter)
end_time = time.time()
processing_time = end_time - start_time
print(f"Total processing time for all rows: {processing_time} seconds")
print(dt.tail())

Total processing time for all rows: 19.963550329208374 seconds
                                                    text  class
21412  brussels  reuters    nato allies on tuesday we...      1
21413  london  reuters    lexisnexis  a provider of l...      1
21414  minsk  reuters    in the shadow of disused sov...      1
21415  moscow  reuters    vatican secretary of state ...      1
21416  jakarta  reuters    indonesia will buy   sukho...      1


# STEP-8: DIVIDING FEATURES INTO DEPENDENT AND INDEPENDENT.

In [31]:
x = dt["text"]
y = dt["class"]

# STEP-9: SPLITTING DATA INTO TRAINING AND TESTING.

In [32]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)

# STEP-10: VECTORIZATION.

In [33]:
vect = TfidfVectorizer()
xvct_train = vect.fit_transform(x_train)
xvct_test = vect.transform(x_test)

# STEP-11: CREATING LOGISTIC REGRESSION MODEL

In [34]:
lr = LogisticRegression()
lr.fit(xvct_train,y_train)

#prediction
y_pred = lr.predict(xvct_test)

#accuracy score
accr = accuracy_score(y_test,y_pred)
print("Accuracy Score: ",accr*100)

#classification report
print(classification_report(y_test,y_pred))

Accuracy Score:  98.64587973273942
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      5886
           1       0.98      0.99      0.99      5339

    accuracy                           0.99     11225
   macro avg       0.99      0.99      0.99     11225
weighted avg       0.99      0.99      0.99     11225



# STEP-12: CREATING DECISION TREE CLASSIFIER MODEL

In [35]:
dct = DecisionTreeClassifier()
dct.fit(xvct_train,y_train)

#prediction
y_pred = dct.predict(xvct_test)

#accuracy score
accr = accuracy_score(y_test,y_pred)
print("Accuracy Score: ",accr*100)

#classification report
print(classification_report(y_test,y_pred))

Accuracy Score:  99.59020044543429
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5886
           1       1.00      0.99      1.00      5339

    accuracy                           1.00     11225
   macro avg       1.00      1.00      1.00     11225
weighted avg       1.00      1.00      1.00     11225



In [36]:
def display_output(flag):
    if flag==0:
        return "FAKE NEWS"
    else:
        return "TRUE NEWS"
    
def testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] =  new_def_test["text"].apply(wordfilter)
    new_x_test = new_def_test["text"]
    new_xv_test = vect.transform(new_x_test)
    pred_lr = lr.predict(new_xv_test)
    pred_dct = dct.predict(new_xv_test)
    one = display_output(pred_lr)
    two = display_output(pred_dct)
    print("RESULT: ",one)
    print("RESULT: ",two)
    if one=="TRUE NEWS" and two=="TRUE NEWS":
        return 1
    else:
        return 0

#news = input()
#main_flag = testing(news)
#print(main_flag)

# CREATING GUI FOR FAKE NEWS DETECTION
# TO DETECT, SUMMARIZE AND TRANSLATE(IFF NEWS IS TRUE).

In [90]:
import tkinter as tk
from tkinter import ttk
from tkinter import *
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from googletrans import Translator, LANGUAGES

def text_summarize(text):
    stopwords = list(STOP_WORDS)
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)

    tokens = [token.text for token in doc]
    #print(tokens)


    word_frequencies = {}
    for word in doc:
        if word.text.lower() not in stopwords:
            if word.text.lower() not in punctuation:
                if word.text not in word_frequencies.keys():
                    word_frequencies[word.text]=1
                else:
                    word_frequencies[word.text]+=1
    #print(word_frequencies)
    max_frequency = max(word_frequencies.values())
    #print(max_frequency)

    for word in word_frequencies.keys():
        word_frequencies[word] = word_frequencies[word]/max_frequency
    #print(word_frequencies)

    sentence_tokens = [sent for sent in doc.sents]
    #print(sentence_tokens)

    sentence_scores = {}
    for sent in sentence_tokens:
        for word in sent:
            if word.text.lower() in word_frequencies.keys():
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = word_frequencies[word.text.lower()]
                else:
                    sentence_scores[sent] += word_frequencies[word.text.lower()]
    #print(sentence_scores)

    from heapq import nlargest
    select_length = int(len(sentence_tokens))
    #print(select_length)

    summary = nlargest(select_length,sentence_scores,key=sentence_scores.get)
    #print(summary)

    final_summary = [word.text for word in summary]
    summary = ' '.join(final_summary)
    return summary
    #print(summary)

def detect_button_click():
    global ot
    global s_text
    global g_flag
    ot = Output_text.get("1.0", "end-1c")
    main_flag = testing(ot)
    print("Main Flag is: ",main_flag)
    try:
        if main_flag==0:
            true_button.config(bg="white")
            fake_button.config(bg="red")
            g_flag=0
        elif main_flag==1:
            true_button.config(bg="green")
            fake_button.config(bg="white")
            s_text = text_summarize(ot)
            print("Summarized Successfully with length: ",len(s_text))
            g_flag=1
        else:
            true_button.config(bg="white")
            fake_button.config(bg="white")
    except ValueError:
        true_button.config(bg="white")
        fake_button.config(bg="white")
        
def getTranslated():
    translator = Translator()
    translated = translator.translate(text=s_text, dest=dest_lang.get())
    #print("TRANSLATED TEXT IS: ",translated.text)
    Output_Translated_text.delete(1.0,END)
    if g_flag==0:
        Output_Translated_text.delete(1.0, END)  # Clear any existing text
        Output_Translated_text.insert(END, "SORRY, TRANSLATION OF FAKE NEWS IS NOT ALLOWED.")
        Output_Translated_text.tag_add("red", 1.0, END)
        Output_Translated_text.tag_configure("red", foreground="red")
    else:
        Output_Translated_text.insert(END,translated.text)

    
punctuation = punctuation + '\n'
language = list(LANGUAGES.values())



root = tk.Tk()
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
root.geometry(f"{screen_width}x{screen_height}")
Label(root,text="F6's FAKE NEWS DETECTOR",font="arial 20 bold",bg="#9acd32").pack()
label_text = "Enter the NEWS: "
label_width = len(label_text) * 10  # Adjust as needed for the desired label width
x_coordinate = (screen_width - label_width) // 2
root.iconbitmap(r"C:\Users\HP\OneDrive\Documents\MyPic_Green Cropped.ico")
root['bg']="#ff4f4b"
root.title("F6's FAKE NEWS DETECTOR")
root.resizable(100,100)

Label(root, text="Enter the NEWS: ",font="arial 13 bold", bg="yellow").place(x=x_coordinate,y=70)

Output_text = Text(root,font="arial 10", height=11,wrap=WORD,padx=5, pady=5,width=100,bg="white smoke")
Output_text.place(x=x_coordinate-280,y=110)


Label(root, text="SUMMARIZED AND TRANSLATED TEXT: ",font="arial 13 bold", bg="yellow").place(x=x_coordinate-84,y=570)

Output_Translated_text = Text(root,font="arial 10", height=11,wrap=WORD,padx=5, pady=5,width=100,bg="white smoke")
Output_Translated_text.place(x=x_coordinate-280,y=610)


detect_button = tk.Button(root, text="DETECT", bg="yellow",command=lambda: detect_button_click(),width=20,height=4,font=("Arial", 10, "bold"))
detect_button.place(x=x_coordinate+5, y=330)
txt = Output_text.get("1.0","end-1c")

#Creating Style object to apply colors to Combobox
style= ttk.Style()
style.theme_use('clam')
style.configure("TCombobox", fieldbackground= "yellow", background= "orange")

#Creating combobox
dest_lang = ttk.Combobox(root,values=language,width=60)
dest_lang.place(x=x_coordinate+450,y=610)
dest_lang.set("Choose Language")

tran = tk.Button(root, text="SUMMARIZE AND TRANSLATE",bg="black",command=getTranslated,width=26,fg="white", font=("Arial", 12, "bold"))
tran.place(x=x_coordinate -60, y=520)

true_button = tk.Button(root, text="TRUE", width=8, height=2,command=getTranslated, bg="#ffd580", fg="white", font=("Arial", 12, "bold"))
true_button.place(x=x_coordinate - 60, y=440)

fake_button = tk.Button(root, text="FAKE", width=8, height=2, bg="#ffd580", fg="white", font=("Arial", 12, "bold"))
fake_button.place(x=x_coordinate + 135, y=440)


root.mainloop()

RESULT:  TRUE NEWS
RESULT:  TRUE NEWS
Main Flag is:  1
Summarized Successfully with length:  744


  return self.func(*args)
  return self.func(*args)
  return self.func(*args)


RESULT:  FAKE NEWS
RESULT:  FAKE NEWS
Main Flag is:  0


  return self.func(*args)


RESULT:  TRUE NEWS
RESULT:  TRUE NEWS
Main Flag is:  1
Summarized Successfully with length:  4133


  return self.func(*args)
