# Fake news Detection

### Importing required library
Here I am going to importing some of the required library, if extra library is required to install It will be install later on.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string

### Inserting fake and real dataset

In [2]:
df_fake = pd.read_csv("Fake.csv")
df_true = pd.read_csv("True.csv")

In [3]:
df_fake.head(5)

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
df_true.head(5)

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


Inserting a column called "class" for fake and real news dataset to categories fake and true news. 

In [5]:
df_fake["class"] = 0
df_true["class"] = 1

Removing last 10 rows from both the dataset, for manual testing  

In [6]:
df_fake.shape, df_true.shape

((23481, 5), (21417, 5))

Merging the manual testing dataframe in single dataset and save it in a csv file

Merging the main fake and true dataframe

In [7]:
df = pd.concat([df_fake, df_true], axis =0 )
df.head(10)

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0
5,Racist Alabama Cops Brutalize Black Boy While...,The number of cases of cops brutalizing and ki...,News,"December 25, 2017",0
6,"Fresh Off The Golf Course, Trump Lashes Out A...",Donald Trump spent a good portion of his day a...,News,"December 23, 2017",0
7,Trump Said Some INSANELY Racist Stuff Inside ...,In the wake of yet another court decision that...,News,"December 23, 2017",0
8,Former CIA Director Slams Trump Over UN Bully...,Many people have raised the alarm regarding th...,News,"December 22, 2017",0
9,WATCH: Brand-New Pro-Trump Ad Features So Muc...,Just when you might have thought we d get a br...,News,"December 21, 2017",0


In [8]:
df.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [9]:
df.isnull().sum()

title      0
text       0
subject    0
date       0
class      0
dtype: int64

#### Randomly shuffling the dataframe 

In [10]:
df.reset_index(inplace = True)
df.drop(["index"], axis = 1, inplace = True)

In [11]:
df.columns

Index(['title', 'text', 'subject', 'date', 'class'], dtype='object')

In [12]:
df.head()

Unnamed: 0,title,text,subject,date,class
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",0
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",0
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",0
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",0
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",0


#### Creating a function to convert the text in lowercase, remove the extra space, special chr., ulr and links.

In [13]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text) 
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)    
    return text

In [14]:
df["text"] = df["text"].apply(wordopt)

#### Defining dependent and independent variable as x and y

In [15]:
x = df["text"]
y = df["class"]

#### Splitting the dataset into training set and testing set. 

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.05)

#### Convert text to vectors

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [18]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

### XGBoost Classifier ###

In [19]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout

In [20]:
xv_train = xv_train.toarray().reshape(xv_train.shape[0], xv_train.shape[1], 1)
xv_test = xv_test.toarray().reshape(xv_test.shape[0], xv_test.shape[1], 1)

In [38]:
model = Sequential()
model.add(Dense(2500,activation='relu',input_dim=101))
model.add(Dense(1500,activation='relu'))
model.add(Dense(1000,activation='relu'))
model.add(Dense(250,activation='relu'))
model.add(Dense(50,activation='relu'))
model.add(Dense(1,activation='sigmoid'))



In [39]:
from tensorflow.keras.optimizers import Adam

optimizer = Adam(learning_rate=0.001)  # Adjust learning rate
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


In [1]:
# Compile the model

from tensorflow.keras.preprocessing.sequence import pad_sequences

# Assuming x_train, x_test, y_train, and y_test are already split from the dataset

# Vectorize the text data
vectorization = TfidfVectorizer(max_features=101)  # Limiting the number of features
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

# Pad sequences to a fixed length
max_sequence_length = 101  # Adjust as needed
xv_train = pad_sequences(xv_train.toarray(), maxlen=max_sequence_length)
xv_test = pad_sequences(xv_test.toarray(), maxlen=max_sequence_length)

# Reshape the data for CNN input
xv_train = np.expand_dims(xv_train, axis=-1)
xv_test = np.expand_dims(xv_test, axis=-1)



NameError: name 'TfidfVectorizer' is not defined

In [43]:
# Train the model
model.fit(xv_train, y_train, epochs=75, batch_size=64, validation_data=(xv_test, y_test))



Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75

In [40]:
# Evaluate the model
loss, accuracy = model.evaluate(xv_test, y_test)
print("Test Accuracy:", accuracy)

NameError: name 'XGB_pred' is not defined

### VOICE REGOGNITION ###

In [41]:
import speech_recognition as sr
from pydub import AudioSegment
import moviepy.editor as mp
import os

In [42]:
clip = mp.VideoFileClip("mTHI.mp4")
clip.audio.write_audiofile("mTHI_mp4.mp3")

MoviePy - Writing audio in mTHI_mp4.mp3


                                                                      

MoviePy - Done.




In [50]:
UserVoiceRecognizer = sr.Recognizer()

try:
    mp3_file_path = r"mTHI_mp4.mp3"

    audio = AudioSegment.from_mp3(mp3_file_path)
    wav_file_path = 'output.wav'
    audio.export(wav_file_path, format='wav')

    with sr.AudioFile(wav_file_path) as source:
        UserVoiceRecognizer.adjust_for_ambient_noise(source, duration=0.5)
        UserVoiceInput = UserVoiceRecognizer.listen(source)

        UserVoiceInput_converted_to_Text = UserVoiceRecognizer.recognize_google(UserVoiceInput)
        UserInput = UserVoiceInput_converted_to_Text.lower()
        
        print(UserVoiceInput_converted_to_Text)

        output_text_file = 'output.txt'
        with open(output_text_file, 'w') as text_file:
            text_file.write(UserVoiceInput_converted_to_Text)

except KeyboardInterrupt:
    print('A KeyboardInterrupt encountered; Terminating the Program !!!')
except sr.UnknownValueError:
    print("No speech detected or could not be recognized.")
except Exception as e:
    print(f"An error occurred: {str(e)}")

An error occurred: [WinError 2] The system cannot find the file specified


# Model Testing With Manual Entry

### News

In [46]:
def output_lable(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"
    
def manual_testing(news):
    testing_news = {"text":[news]}
    new_def_test = pd.DataFrame(testing_news)
    new_def_test["text"] = new_def_test["text"].apply(wordopt) 
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_XGB = XGB.predict(new_xv_test)

    return print("\nXGB Prediction : {}".format(output_lable(pred_XGB[0])))

In [47]:
print(UserInput)
manual_testing(UserInput)

NameError: name 'UserInput' is not defined