# Fake News Detection - Jupyter Notebook

In [ ]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib

In [ ]:
fake = pd.read_csv('../dataset/fake.csv')
real = pd.read_csv('../dataset/real.csv')
fake['label'] = 0
real['label'] = 1
data = pd.concat([fake, real]).sample(frac=1).reset_index(drop=True)

In [ ]:
x_train, x_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)
model = LogisticRegression()
model.fit(x_train_vec, y_train)
y_pred = model.predict(x_test_vec)
print(f'Model Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%')

In [ ]:
joblib.dump(model, '../app/model.pkl')
joblib.dump(vectorizer, '../app/vectorizer.pkl')