In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/FakeNewsNet.csv/FakeNewsNet.csv')

# Drop rows with missing values in relevant columns
df = df[['tweet_num', 'source_domain', 'title', 'real']].dropna()

# Define feature extraction functions
def get_title(X):
    return X['title']

def get_domain(X):
    return X[['source_domain']]

def get_tweet_num(X):
    return X[['tweet_num']]

# Preprocessing and feature engineering
preprocessor = ColumnTransformer([
    ('title_tfidf', TfidfVectorizer(max_features=1000), 'title'),
    ('domain_ohe', OneHotEncoder(handle_unknown='ignore'), ['source_domain']), # Changed 'source_domain' to ['source_domain']
    ('tweet_num', FunctionTransformer(lambda x: x[['tweet_num']]), ['tweet_num'])
])

# Create pipeline
pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', LogisticRegression(max_iter=2000))
])

# Define features and target
X = df[['tweet_num', 'source_domain', 'title']]
y = df['real'].astype(int)  # Assuming 1 for real, 0 for fake

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
pipeline.fit(X_train, y_train)

# Evaluate model
y_pred = pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8668561434193266
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.55      0.67      1105
           1       0.87      0.97      0.92      3469

    accuracy                           0.87      4574
   macro avg       0.86      0.76      0.79      4574
weighted avg       0.86      0.87      0.86      4574

