<a href="https://colab.research.google.com/github/Tlotlo01/Fake-and-real-news-detection-system/blob/main/fake_real_news_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Version 1**

In [None]:
# Step 1: Import Required Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Step 2: Load the Dataset
df = pd.read_csv('https://raw.githubusercontent.com/GeorgeMcIntire/fake_real_news_dataset/refs/heads/main/fake_and_real_news_dataset.csv')

# Step 3: Explore the Data
print("First 5 rows:\n", df.head())
print("\nColumn Names:", df.columns)

# Step 4: Define Features and Labels
X = df['text']  # You could also combine 'title' + 'text'
y = df['label']  # 'FAKE' or 'REAL'

# Step 5: Split the Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Text Vectorization (TF-IDF)
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 7: Train the Classifier
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Step 8: Evaluate the Model
y_pred = model.predict(X_test_vec)
print("\n📊 Classification Report:\n")
print(classification_report(y_test, y_pred))

# Step 9: Try a Custom Prediction
def predict_news(news_text):
    news_vec = vectorizer.transform([news_text])
    prediction = model.predict(news_vec)[0]
    return f"🧠 Prediction: This news is likely **{prediction.upper()}**."

# Example:
sample_text = "Girls about to get wasted tonight."
print("\n", predict_news(sample_text))


First 5 rows:
           idd                                              title  \
0  Fq+C96tcx+  ‘A target on Roe v. Wade ’: Oklahoma bill maki...   
1  bHUqK!pgmv  Study: women had to drive 4 times farther afte...   
2  4Y4Ubf%aTi        Trump, Clinton clash in dueling DC speeches   
3  _CoY89SJ@K  Grand jury in Texas indicts activists behind P...   
4  +rJHoRQVLe  As Reproductive Rights Hang In The Balance, De...   

                                                text label  
0  UPDATE: Gov. Fallin vetoed the bill on Friday....  REAL  
1  Ever since Texas laws closed about half of the...  REAL  
2  Donald Trump and Hillary Clinton, now at the s...  REAL  
3  A Houston grand jury investigating criminal al...  REAL  
4  WASHINGTON -- Forty-three years after the Supr...  REAL  

Column Names: Index(['idd', 'title', 'text', 'label'], dtype='object')

📊 Classification Report:

              precision    recall  f1-score   support

        FAKE       0.87      0.95      0.91       450
  

# **Version 2**

In [None]:
# Install necessary libraries
!pip install gradio joblib --quiet


In [None]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
import joblib
import gradio as gr


## **Load the data**

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/GeorgeMcIntire/fake_real_news_dataset/refs/heads/main/fake_and_real_news_dataset.csv')

X = df['text']
y = df['label']


## **Split the data**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## **Text Vectorization**

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_vec, y_train)

# Save for future use
joblib.dump(model, 'model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')


['vectorizer.pkl']

# **Evaluate the model**

In [None]:
y_pred = model.predict(X_test_vec)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

        FAKE       0.89      0.91      0.90       450
        REAL       0.91      0.89      0.90       469

    accuracy                           0.90       919
   macro avg       0.90      0.90      0.90       919
weighted avg       0.90      0.90      0.90       919



In [None]:

def predict_news(text):
    vect = vectorizer.transform([text])
    prediction = model.predict(vect)[0]
    return f"This news is likely **{prediction.upper()}**."

interface = gr.Interface(
    fn=predict_news,
    inputs=gr.Textbox(label="Enter news article"),
    outputs=gr.Textbox(label="Prediction"),
    title="📰 Fake News Detection",
    description="Enter a news article to check if it's FAKE or REAL."
)

interface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://77432788d18e7c49ab.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


