# Data prep

In [1]:
import pandas as pd
df_fake = pd.read_csv("/kaggle/input/fake-and-real-news-dataset/Fake.csv")
df_fake['label']='FAKE'
df_true = pd.read_csv("/kaggle/input/fake-and-real-news-dataset/True.csv")
df_true['label']='TRUE'
df = pd.concat([df_fake, df_true], ignore_index=True)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df['content'] = df['title'] + ': ' + df['text']
X = df["content"]
y = df["label"]

In [3]:
from sklearn.model_selection import train_test_split

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

# Build the pipeline for RandomForest
pipe_rf = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_df=0.7)),
    ('clf', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
pipe_rf.fit(X_train, y_train)

# Predict and evaluate
y_pred_rf = pipe_rf.predict(X_test)
print("Random Forest Results:\n", classification_report(y_test, y_pred_rf))

Random Forest Results:
               precision    recall  f1-score   support

        FAKE       0.99      0.99      0.99      4710
        TRUE       0.99      0.99      0.99      4270

    accuracy                           0.99      8980
   macro avg       0.99      0.99      0.99      8980
weighted avg       0.99      0.99      0.99      8980



# Gradient Boosting

In [5]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report

# Build the pipeline for Gradient Boosting
pipe_gb = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_df=0.7)),
    ('clf', GradientBoostingClassifier(n_estimators=100, random_state=42))
])

# Train the model
pipe_gb.fit(X_train, y_train)

# Predict and evaluate
y_pred_gb = pipe_gb.predict(X_test)
print("Gradient Boosting Results:\n", classification_report(y_test, y_pred_gb))

Gradient Boosting Results:
               precision    recall  f1-score   support

        FAKE       1.00      0.99      1.00      4710
        TRUE       0.99      1.00      1.00      4270

    accuracy                           1.00      8980
   macro avg       1.00      1.00      1.00      8980
weighted avg       1.00      1.00      1.00      8980

