In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv("cleaned_dataset.csv")

In [2]:
# Handle missing data
data = data[["text", "label"]].dropna()  # Drop rows with missing values

# Encode labels
le = LabelEncoder()
data["label"] = le.fit_transform(data["label"])

# Split data into features and target
x = data["text"]
y = data["label"]

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [3]:
# pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, stop_words='english')),
    ('model', LogisticRegression())
])

# Hyperparameter tuning using GridSearchCV
param_grid = {'model__C': [0.1, 1, 10]}  # Regularization parameter
grid_search = GridSearchCV(pipeline, param_grid, scoring='accuracy', cv=5)
grid_search.fit(x_train, y_train)

best_model = grid_search.best_estimator_

# Evaluate
y_pred = best_model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print(classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy: 0.77
              precision    recall  f1-score   support

        Fake       0.80      0.87      0.83       264
        Real       0.72      0.60      0.65       144

    accuracy                           0.77       408
   macro avg       0.76      0.73      0.74       408
weighted avg       0.77      0.77      0.77       408



In [4]:
# Prediction function
def fakenewsprediction(text):
    input_data = [text]
    prediction = best_model.predict(input_data)
    return "Real News" if prediction[0] == 0 else "Fake News"

article_input = "Scientists have discovered a new species of frog in the Amazon rainforest, which could help in understanding climate change."
prediction = fakenewsprediction(article_input)
print(f"Prediction: {prediction}")

Prediction: Real News
