In [3]:
#This Python scripts uses libraries to load data, process text into numerical features, train a logistic regression model, and evaluate its performance.
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [4]:
#upload dataset
fake = pd.read_csv("Fake.csv")
true = pd.read_csv("True.csv")

fake["label"] = 0   # Fake news
true["label"] = 1   # Real news


In [5]:
#Combine Both Datasets
df = pd.concat([fake, true], axis=0)
df = df.sample(frac=1).reset_index(drop=True)  # shuffle

df.head()


Unnamed: 0,title,text,subject,date,label
0,Trump revealed intelligence secrets to Russian...,WASHINGTON (Reuters) - President Donald Trump ...,politicsNews,"May 15, 2017",1
1,Trump Makes Total Ass Of Himself While Trying...,Donald Trump decided that he would exploit a w...,News,"April 22, 2017",0
2,"Hereâ€™s How Much Trump Is Paying Omarosa, Kell...","The average American makes about $50,000 a yea...",News,"June 30, 2017",0
3,Michigan Gov. Poison Gets Humiliated By Mark ...,Governor Rick Snyder should probably stay off ...,News,"January 19, 2016",0
4,COVER-UP: Both Obama and Clinton Lied About Tr...,21st Century Wire says The mainstream media h...,Middle-east,"October 21, 2016",0


In [6]:
# Create Final Dataset Format
df["text"] = df["title"] + " " + df["text"]
df = df[["text", "label"]]

df.head()


Unnamed: 0,text,label
0,Trump revealed intelligence secrets to Russian...,1
1,Trump Makes Total Ass Of Himself While Trying...,0
2,"Hereâ€™s How Much Trump Is Paying Omarosa, Kell...",0
3,Michigan Gov. Poison Gets Humiliated By Mark ...,0
4,COVER-UP: Both Obama and Clinton Lied About Tr...,0


In [7]:
#Split features and labels
X = df["text"]
y = df["label"]


In [8]:
#converting text to numbers
vectorizer = TfidfVectorizer(
    stop_words="english",
    max_df=0.7
)

X_vec = vectorizer.fit_transform(X)


In [9]:
#train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_vec, y, test_size=0.2, random_state=42
)


In [10]:
#trainng the machine learning model
model = LogisticRegression()
model.fit(X_train, y_train)


In [11]:
#evaluate the model accuracy
pred = model.predict(X_test)
accuracy = accuracy_score(y_test, pred)

print("Model Accuracy:", accuracy)


Model Accuracy: 0.9846325167037862


In [12]:
#train with custom news
def predict_news(news):
    vec = vectorizer.transform([news])
    result = model.predict(vec)[0]
    return "REAL NEWS ðŸŸ¢" if result == 1 else "FAKE NEWS ðŸ”´"

predict_news("Government announces new education policy")


'FAKE NEWS ðŸ”´'

In [13]:
pickle.dump(model, open("model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))


In [16]:
from google.colab import files

files.download("model.pkl")
files.download("vectorizer.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>