# **Fake News Classifier Model**

## (1). Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import re
import string

## (2). Load the datasets

In [2]:
fake_df = pd.read_csv(r'A:\MTECH(Data Science)\DataSet\P\News _dataset\Fake.csv')
true_df = pd.read_csv(r'A:\MTECH(Data Science)\DataSet\P\News _dataset\True.csv')

In [4]:
fake_df.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
true_df.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


## (3). Add a label column to each dataset: 1 for fake news, 0 for real news

In [6]:
fake_df['label'] = 1
true_df['label'] = 0

In [7]:
fake_df.head()

Unnamed: 0,title,text,subject,date,label
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017",1
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017",1
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017",1
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017",1
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017",1


In [8]:
true_df.head()

Unnamed: 0,title,text,subject,date,label
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017",0
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017",0
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017",0
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017",0
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017",0


## (4). Combine the datasets

In [9]:
df = pd.concat([fake_df, true_df], ignore_index=True)

In [17]:
df.sample(5)

Unnamed: 0,title,text,subject,date,label
43520,U.S. calls on China to use 'powerful tool' of ...,london reuters us secretary of state rex till...,worldnews,"September 14, 2017",0
36443,Senior Palestinian figure Dahlan urges exit fr...,gaza reuters influential exiled palestinian p...,worldnews,"December 6, 2017",0
12974,"DUMB AS A ROCK…Gary Johnson On NYC, MN Terror ...",saturday s bomb blast in new york city injured...,politics,"Sep 18, 2016",1
29713,Puerto Rico oversight board favors more time f...,new york reuters puerto ricos federal oversig...,politicsNews,"January 18, 2017",0
1463,Ann Coulter Is Super Pissed That Trump Is A D...,donald trump s number one fan still believes i...,News,"May 15, 2017",1


## (5). Function Preprocess the text data

In [10]:
def preprocess_text(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()
    text = re.sub('[^a-z\s]', '', text)
    return text

## (6). Preprocess the text data

In [11]:
df['text'] = df['text'].apply(preprocess_text)

## (7). Split the data for training & testing

In [12]:
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

## (8). Vectorize the text data

In [13]:
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

## (9). Train the model

In [14]:
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

## (10). Evaluate the model

In [15]:
y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9879732739420936
[[4196   51]
 [  57 4676]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      4247
           1       0.99      0.99      0.99      4733

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



## (11). Function to predict new text/input

In [14]:
def predict_news(new_text):
    # Preprocess the new text
    processed_text = preprocess_text(new_text)

    # Transform the text using the loaded TF-IDF vectorizer
    text_tfidf = tfidf_vectorizer.transform([processed_text])

    # Predict the label using the loaded model
    prediction = model.predict(text_tfidf)

    # Return the prediction
    return prediction[0]


## (12). Prediction for new Input/Text

In [18]:
# Example new input
new_text = "Breaking news: Scientists have discovered a new planet in our solar system."

# Predict whether the news is fake or real
prediction = predict_news(new_text)

if prediction == 1:
    print("The news is fake.")
else:
    print("The news is real.")


The news is fake.


In [17]:
# Example new input
new_text = "The President of the United States has signed a new bill into law aimed at improving the country's infrastructure."

# Predict whether the news is fake or real
prediction = predict_news(new_text)

if prediction == 1:
    print("The news is fake.")
else:
    print("The news is real.")


The news is real.


# **********************************************************************

## **Name - Aatish Kumar Baitha**
  - M.Tech(Data Science 2nd Year Student)
- My Linkedin Profile -
  - https://www.linkedin.com/in/aatish-kumar-baitha-ba9523191
- My Blog
  - https://computersciencedatascience.blogspot.com/
- My Github Profile
  - https://github.com/Aatishkb

# **Thank you!**