In [None]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

# Load the 20 Newsgroups dataset
newsgroups = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))

# Preprocess the data
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X = vectorizer.fit_transform(newsgroups.data)
y = newsgroups.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score

# Train a Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

# Predict on the test set
nb_predictions = nb_classifier.predict(X_test)

# Calculate F-score
nb_f1_score = f1_score(y_test, nb_predictions, average='weighted')
print(f"F1-score for Naive Bayes Classifier: {nb_f1_score:.4f}")


F1-score for Naive Bayes Classifier: 0.5591


In [None]:
from sklearn.neighbors import NearestCentroid

# Train a Rocchio classifier
rocchio_classifier = NearestCentroid()
rocchio_classifier.fit(X_train, y_train)

# Predict on the test set
rocchio_predictions = rocchio_classifier.predict(X_test)

# Calculate F-score
rocchio_f1_score = f1_score(y_test, rocchio_predictions, average='weighted')
print(f"F1-score for Rocchio Classifier: {rocchio_f1_score:.4f}")


F1-score for Rocchio Classifier: 0.5302


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Train a K-Nearest Neighbor classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)

# Predict on the test set
knn_predictions = knn_classifier.predict(X_test)

# Calculate F-score
knn_f1_score = f1_score(y_test, knn_predictions, average='weighted')
print(f"F1-score for K-Nearest Neighbor Classifier: {knn_f1_score:.4f}")


F1-score for K-Nearest Neighbor Classifier: 0.2317


In [None]:
# Compare F-scores
print(f"\nComparison of Classifiers:")
print(f"Naive Bayes Classifier F1-score: {nb_f1_score:.4f}")
print(f"Rocchio Classifier F1-score: {rocchio_f1_score:.4f}")
print(f"K-Nearest Neighbor Classifier F1-score: {knn_f1_score:.4f}")



Comparison of Classifiers:
Naive Bayes Classifier F1-score: 0.5591
Rocchio Classifier F1-score: 0.5302
K-Nearest Neighbor Classifier F1-score: 0.2317
