In [16]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier, SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from skmultiflow.trees import HoeffdingTree


In [17]:
fake_news = "D:/Desktop/Fake_News_Dataset/ISOT_fake.csv"
true_news = "D:/Desktop/Fake_News_Dataset/ISOT_true.csv"

In [18]:
true_df = pd.read_csv(fake_news)
fake_df = pd.read_csv(true_news)

In [19]:
true_df["label"] = 1  # 1 for true news
fake_df["label"] = 0  # 0 for fake news

In [20]:
# Combine datasets
combined_df = pd.concat([true_df, fake_df], ignore_index=True)

# Shuffle data
combined_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_df['text'], combined_df['label'], test_size=0.2, random_state=42)

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)

In [21]:
# Transform text data into TF-IDF features
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Initialize models
pac = PassiveAggressiveClassifier(random_state=42)
sgd = SGDClassifier(loss='log_loss', random_state=42)
gb = GradientBoostingClassifier()

In [22]:
pac.fit(X_train_tfidf, y_train)
sgd.fit(X_train_tfidf, y_train)

# Get predictions
pac_pred = pac.predict(X_test_tfidf)
sgd_pred = sgd.predict(X_test_tfidf)

In [23]:
pac_accuracy = accuracy_score(y_test, pac_pred)
sgd_accuracy = accuracy_score(y_test, sgd_pred)

In [24]:
print("Passive Aggressive Classifier accuracy:", pac_accuracy)
print("SGD Classifier accuracy:", sgd_accuracy)

Passive Aggressive Classifier accuracy: 0.9928730512249443
SGD Classifier accuracy: 0.9775055679287306


In [25]:
# Concatenate predictions as features
X_test_features = pd.DataFrame({'PAC': pac_pred, 'SGD': sgd_pred})

In [26]:
# Fit Gradient Boosting Classifier
gb.fit(X_test_features, y_test)

# Get final predictions
final_pred = gb.predict(X_test_features)

# Calculate accuracy
accuracy = accuracy_score(y_test, final_pred)
print("Final accuracy:", accuracy)

Final accuracy: 0.9928730512249443
