In [15]:
import pandas as pd
import nltk
import string
import joblib
import re
import warnings
warnings.filterwarnings('ignore')

from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# Load Data
fake = pd.read_csv("Fake.csv")
real = pd.read_csv("True.csv")

fake.head(1)

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"


In [7]:
real.head(1)

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"


In [9]:
fake.isnull().sum()

title      0
text       0
subject    0
date       0
dtype: int64

In [12]:
real.isnull().sum()

title      0
text       0
subject    0
date       0
dtype: int64

In [17]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\alfai\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [19]:
fake['label'] = 0

In [20]:
real['label'] = 1

In [23]:
df = pd.concat([fake[['text', 'label']], real[['text', 'label']]], axis=0)
df = df.sample(frac=1).reset_index(drop=True)

In [25]:
# Clean Text 
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    tokens = [t for t in tokens if t not in stop_words]
    return ' '.join(tokens)

df['text'] = df['text'].apply(clean_text)

In [27]:
# Vectorize 
vectorizer = TfidfVectorizer(max_df=0.7)
X = vectorizer.fit_transform(df['text'])
y = df['label']

In [29]:
# Split & Train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
model = LogisticRegression(class_weight='balanced')
model.fit(X_train, y_train)

In [30]:
# Save Model & Vectorizer
joblib.dump(model, "model.pkl")
joblib.dump(vectorizer, "tfidf.pkl")

print("✅ Model and vectorizer saved successfully.")

✅ Model and vectorizer saved successfully.
