<a href="https://colab.research.google.com/github/Banafshehkh/Fake_News_Detection/blob/main/Fake_News_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Author: Banafsheh Khazali
#Date: April 27, 2023

In [83]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [84]:
import numpy as pd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import CountVectorizer #Convert a collection of text documents to a matrix of token counts.
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

## **Load the Data**

In [123]:
train_df = pd.read_csv('/content/drive/MyDrive/Data/train_preprocessed.csv')
val_df = pd.read_csv('/content/drive/MyDrive/Data/valid_preprocessed.csv')
test_df = pd.read_csv('/content/drive/MyDrive/Data/test_preprocessed.csv')

In [124]:
print(val_df["Label"].unique())

['FALSE' 'TRUE' 'Label']


In [125]:
print(test_df["Label"].unique())

[ True False]


In [126]:
print(train_df["Label"].unique())

[False  True]


In [127]:
val_df = val_df.drop(val_df[val_df['Label'] == 'Label'].index)
print(val_df["Label"].unique())

['FALSE' 'TRUE']


In [128]:
val_df['Label'] = val_df['Label'].str.lower().map({'true': True, 'false': False})
print(val_df['Label'].unique())

[False  True]


In [130]:
train_df.Label =  train_df['Label'].replace({True: 1, False: 0})
val_df.Label = val_df['Label'].replace({True: 1, False: 0})
test_df.Label = test_df['Label'].replace({True: 1, False: 0})

In [131]:
vectorizer = CountVectorizer(stop_words='english')

X_train = vectorizer.fit_transform(train_df['Statement'])
y_train = train_df['Label']
y_train = y_train.astype(int)

X_val = vectorizer.transform(val_df['Statement'])
y_val = val_df['Label']
y_val = y_val.astype(int)

X_test = vectorizer.transform(test_df['Statement'])
y_test = test_df['Label']
y_test = y_test.astype(int)


## **Train a Machine Learning Model**

naive bayes

In [132]:
clf = MultinomialNB()
clf.fit(X_train, y_train)

## **Evaluate the Model**

In [135]:
y_pred_val = clf.predict(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
print(f"Validation accuracy: {val_accuracy:.2f}")

Validation accuracy: 0.61


## **Test the Model**

In [136]:
y_pred_test = clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred_test)
print(f"Test accuracy: {test_accuracy:.2f}")

Test accuracy: 0.62


In [137]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter search space
param_grid = {'alpha': [0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
              'fit_prior': [True, False]}

# Create a grid search object
grid_search = GridSearchCV(estimator=MultinomialNB(),
                           param_grid=param_grid,
                           cv=5,
                           scoring='accuracy')

# Fit the grid search object to the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and validation accuracy
print(f"Best hyperparameters: {grid_search.best_params_}")
print(f"Validation accuracy: {grid_search.best_score_:.2f}")

# Evaluate the model on the test dataset using the best hyperparameters
best_clf = grid_search.best_estimator_
y_pred_test = best_clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred_test)
print(f"Test accuracy: {test_accuracy:.2f}")


Best hyperparameters: {'alpha': 5.0, 'fit_prior': False}
Validation accuracy: 0.61
Test accuracy: 0.62
