In [2]:
import pandas as pd

data_fake = pd.read_csv('C:/fake_newsproject/Fake.csv')
data_true = pd.read_csv('C:/fake_newsproject/True.csv')


In [3]:
print("Fake News Shape:", data_fake.shape)
print("True News Shape:", data_true.shape)
data_fake.head()


Fake News Shape: (23481, 4)
True News Shape: (21417, 4)


Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
data_fake['label'] = 0   # 0 = fake
data_true['label'] = 1

In [5]:
#Combine both into one dataframe
data = pd.concat([data_fake, data_true], axis=0)

In [6]:
# Shuffle the data to mix fake and true rows
data = data.sample(frac=1).reset_index(drop=True)

In [7]:
data.head()

Unnamed: 0,title,text,subject,date,label
0,South Carolina Republican's town hall starts r...,"NORTH CHARLESTON, S.C. (Reuters) - U.S. Senato...",politicsNews,"February 25, 2017",1
1,Even Trump Voters Think Obama’s Economy Is Do...,Democrats and a lot of Republicans are scratch...,News,"July 19, 2016",0
2,How Obama And Trump Responded To Fidel Castro...,President Obama couldn t be more different fro...,News,"November 26, 2016",0
3,HILLARY APPEARS Wearing “Anti-Seizure” Sunglas...,Remember when experts came out after Hillary c...,politics,"May 30, 2017",0
4,CRIES OF RACISM AFTER NYC MUSEUM KICKS OUT ROW...,So am I getting this right? Everyone needs to ...,left-news,"May 11, 2015",0


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Use title
X = data['title']
y = data['label']

# Step 2: Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 4: Train model
model = RandomForestClassifier()
model.fit(X_train_vec, y_train)

# Step 5: Evaluate
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nReport:\n", classification_report(y_test, y_pred))


Accuracy: 0.9439866369710468

Report:
               precision    recall  f1-score   support

           0       0.96      0.93      0.95      4681
           1       0.93      0.96      0.94      4299

    accuracy                           0.94      8980
   macro avg       0.94      0.94      0.94      8980
weighted avg       0.94      0.94      0.94      8980



In [9]:
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.9439866369710468


In [14]:
def predict_news():
    news_text = input("Enter news: ")
    news_vec = vectorizer.transform([news_text])
    prediction = model.predict(news_vec)

    if prediction[0] == 0:
        print("🟥 FAKE News Detected")
    else:
        print("🟩 REAL News Detected")

predict_news()


Enter news:  "India Successfully Launches Chandrayaan-3 to the Moon"


🟩 REAL News Detected


In [11]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
model = RandomForestClassifier()
model.fit(X_train_vec, y_train)


In [12]:
import pickle

# Save trained model and vectorizer
pickle.dump(model, open("model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))


In [13]:
import joblib
joblib.dump(model, "model.pkl", compress=3)


['model.pkl']