In [1]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Fetch 20 Newsgroups dataset
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')

# Define feature extractors
count_vectorizer = CountVectorizer()
tfidf_vectorizer = TfidfVectorizer()

# Define algorithms
algorithms = {
    'Multinomial Naive Bayes': MultinomialNB(),
    'Decision Trees': DecisionTreeClassifier()
}

# Benchmark results
results = []

for feature_extractor in [count_vectorizer, tfidf_vectorizer]:
    X_train = feature_extractor.fit_transform(newsgroups_train.data)
    X_test = feature_extractor.transform(newsgroups_test.data)
    
    for name, algo in algorithms.items():
        algo.fit(X_train, newsgroups_train.target)
        y_pred = algo.predict(X_test)
        accuracy = accuracy_score(newsgroups_test.target, y_pred)
        results.append({
            'Feature Extractor': feature_extractor.__class__.__name__,
            'Algorithm': name,
            'Accuracy': accuracy
        })

# Display results in a table
benchmark_table = pd.DataFrame(results)
print(benchmark_table)

  Feature Extractor                Algorithm  Accuracy
0   CountVectorizer  Multinomial Naive Bayes  0.772836
1   CountVectorizer           Decision Trees  0.567313
2   TfidfVectorizer  Multinomial Naive Bayes  0.773898
3   TfidfVectorizer           Decision Trees  0.555762
