#Project : Fake News Detection

In [2]:
import pandas as pd

# Load the datasets
fake_df = pd.read_csv(r'F:\python\tensorflow project\fake news\fake.csv')
true_df = pd.read_csv(r'F:\python\tensorflow project\fake news\true.csv')

# Show first 5 rows of each to understand structure
print("Fake News Sample:")
display(fake_df.head())

print("True News Sample:")
display(true_df.head())


Fake News Sample:


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


True News Sample:


Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [3]:
# Add a label column to both datasets
fake_df['label'] = 0  # 0 for fake
true_df['label'] = 1  # 1 for real

# Combine the datasets
data = pd.concat([fake_df, true_df], ignore_index=True)

# Shuffle the data
data = data.sample(frac=1).reset_index(drop=True)

# Preview the combined data
data.head()


Unnamed: 0,title,text,subject,date,label
0,"Behind the scenes, Ryan touts his agenda in Cl...",CLEVELAND (Reuters) - U.S. House Speaker Paul ...,politicsNews,"July 20, 2016",1
1,Man Dies In Peace After Learning Trump Had Be...,"A dying man had just one wish, and that was fo...",News,"April 18, 2017",0
2,Trump Threatens To Ruin Protesters’ Lives In ...,"By now, everyone has heard of the violence and...",News,"March 12, 2016",0
3,Anti-Assad nations say no to Syria reconstruct...,"NEW YORK (Reuters) - The United States, Britai...",politicsNews,"September 18, 2017",1
4,COMEDIAN SHOCKS LEFTIST COLLEGE STUDENTS With ...,This video of conservative comedian Steven Cro...,left-news,"Apr 26, 2016",0


In [4]:
import re

# Combine title and text into one column
data['content'] = data['title'] + " " + data['text']

# Define a simple text cleaning function
def clean_text(text):
    text = text.lower()                             # lowercase
    text = re.sub(r'\d+', '', text)                 # remove numbers
    text = re.sub(r'[^\w\s]', '', text)             # remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()        # remove extra whitespace
    return text

# Apply the cleaning function
data['content'] = data['content'].apply(clean_text)

# Preview cleaned content
data[['content', 'label']].head()


Unnamed: 0,content,label
0,behind the scenes ryan touts his agenda in cle...,1
1,man dies in peace after learning trump had bee...,0
2,trump threatens to ruin protesters lives in fi...,0
3,antiassad nations say no to syria reconstructi...,1
4,comedian shocks leftist college students with ...,0


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)

# Fit and transform the text data
X = vectorizer.fit_transform(data['content'])

# Target variable
y = data['label']


In [7]:
print(X.shape)  # Should show something like: (rows, features)
print(y.value_counts())  # See how many fake vs true


(44898, 215676)
label
0    23481
1    21417
Name: count, dtype: int64


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [9]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [11]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict on test set
y_pred = model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Precision, Recall, F1-Score
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9885300668151448
Confusion Matrix:
 [[4601   54]
 [  49 4276]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      4655
           1       0.99      0.99      0.99      4325

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



In [16]:
# Take user input
user_input = input("Enter the news text to check if it is Fake or Real:\n")

# Preprocess input same as training
def preprocess_text(text):
    # Your preprocessing steps: lowercase, remove punctuation, etc.
    text = text.lower()
    import re
    text = re.sub(r'\W', ' ', text)  # remove non-word chars
    text = re.sub(r'\s+', ' ', text)  # remove extra spaces
    return text.strip()

processed_input = preprocess_text(user_input)

# Vectorize the input text using your trained TfidfVectorizer 'vectorizer'
input_vector = vectorizer.transform([processed_input])

# Predict using your trained model 'model'
prediction = model.predict(input_vector)

# Output result
if prediction[0] == 0:
    print("Prediction: FAKE news")
else:
    print("Prediction: REAL news")


Prediction: REAL news


#Done