In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load Dataset

fake_news = pd.read_csv("/content/Fake.csv")
real_news = pd.read_csv("/content/True.csv")

# Add a target column: 1 for real news, 0 for fake news
fake_news['label'] = 0
real_news['label'] = 1

# Combine the datasets
news_data = pd.concat([fake_news, real_news], axis=0).reset_index(drop=True)

# Shuffle the dataset
news_data = news_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Extract features and labels
X = news_data['text']
y = news_data['label']

# Convert text data to numerical data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Naive Bayes Classifier
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)

print("Naive Bayes Results:")
print(f"Accuracy: {accuracy_score(y_test, nb_predictions):.2f}")
print(classification_report(y_test, nb_predictions))




Naive Bayes Results:
Accuracy: 0.93
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      4710
           1       0.92      0.92      0.92      4270

    accuracy                           0.93      8980
   macro avg       0.93      0.93      0.93      8980
weighted avg       0.93      0.93      0.93      8980



In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# Load Dataset
fake_news = pd.read_csv("/content/Fake.csv")
real_news = pd.read_csv("/content/True.csv")

# Add a target column: 1 for real news, 0 for fake news
fake_news['label'] = 0
real_news['label'] = 1

# Combine the datasets
news_data = pd.concat([fake_news, real_news], axis=0).reset_index(drop=True)

# Take a smaller subset for quicker processing
news_data = news_data.sample(n=10000, random_state=42)

# Extract features and labels
X = news_data['text']
y = news_data['label']

# Convert text data to numerical data using TF-IDF
tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=2000)
X_tfidf = tfidf_vectorizer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Support Vector Machine Classifier (Optimized with LinearSVC)
svm_model = LinearSVC(random_state=42)
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)

# Evaluate the SVM model
print("Support Vector Machine Results:")
print(f"Accuracy: {accuracy_score(y_test, svm_predictions):.2f}")
print(classification_report(y_test, svm_predictions))


Support Vector Machine Results:
Accuracy: 0.98
              precision    recall  f1-score   support

           0       0.99      0.98      0.98      1081
           1       0.98      0.98      0.98       919

    accuracy                           0.98      2000
   macro avg       0.98      0.98      0.98      2000
weighted avg       0.98      0.98      0.98      2000

