In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

true_news = pd.read_csv('data\True.csv').head(1000)
fake_news = pd.read_csv('data\Fake.csv').head(1000)

true_news['label'] = 'true'
fake_news['label'] = 'fake'

data = pd.concat([true_news, fake_news], ignore_index=True)

X = data['text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

lr_classifier = LogisticRegression(random_state=42)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
svm_classifier = SVC(kernel='linear', probability=True, random_state=42)

voting_classifier = VotingClassifier(
    estimators=[
        ('lr', lr_classifier),
        ('rf', rf_classifier),
        ('svm', svm_classifier)
    ],
    voting='soft'
)

voting_classifier.fit(X_train_tfidf, y_train)

y_pred = voting_classifier.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy of the Voting Classifier: {:.2f}%".format(accuracy * 100))

  true_news = pd.read_csv('data\True.csv').head(1000)
  fake_news = pd.read_csv('data\Fake.csv').head(1000)


Accuracy of the Voting Classifier: 99.20%
