In [4]:
#  Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import joblib

#  Load and label the datasets
df_fake = pd.read_csv('Fake.csv')
df_true = pd.read_csv('True.csv')

df_fake['label'] = 'FAKE'
df_true['label'] = 'REAL'

#  Combine and shuffle
df = pd.concat([df_fake, df_true]).sample(frac=1).reset_index(drop=True)

#  Prepare features and labels
X = df['text']
y = df['label']

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create pipeline (vectorizer + model)
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_df=0.7)),
    ('model', LogisticRegression())
])

#  Train the pipeline
pipeline.fit(X_train, y_train)

#  Save the full pipeline (vectorizer + model)
joblib.dump(pipeline, 'model.pkl')

print("Model training complete and saved as 'model.pkl'")


 Model training complete and saved as 'model.pkl'
