In [None]:
#This Python scripts uses libraries to load data, process text into numerical features, train a logistic regression model, and evaluate its performance.
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [None]:
#upload dataset
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

fake["label"] = 0   # Fake news
true["label"] = 1   # Real news


In [None]:
#Combine Both Datasets
df = pd.concat([fake, true], axis=0)
df = df.sample(frac=1).reset_index(drop=True)  # shuffle

df.head()


In [None]:
# Create Final Dataset Format
df["text"] = df["title"] + " " + df["text"]
df = df[["text", "label"]]

df.head()


In [None]:
#Split features and labels
X = df["text"]
y = df["label"]


In [None]:
#converting text to numbers
vectorizer = TfidfVectorizer(
    stop_words="english",
    max_df=0.7
)

X_vec = vectorizer.fit_transform(X)


In [None]:
#train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_vec, y, test_size=0.2, random_state=42
)


In [None]:
#trainng the machine learning model
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
#evaluate the model accuracy
pred = model.predict(X_test)
accuracy = accuracy_score(y_test, pred)

print("Model Accuracy:", accuracy)


In [None]:
#train with custom news
def predict_news(news):
    vec = vectorizer.transform([news])
    result = model.predict(vec)[0]
    return "REAL NEWS ðŸŸ¢" if result == 1 else "FAKE NEWS ðŸ”´"

predict_news("Government announces new education policy")


In [None]:
pickle.dump(model, open("model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))


In [None]:
from google.colab import files

files.download("model.pkl")
files.download("vectorizer.pkl")
