In [None]:
import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the 20 newsgroups dataset
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=('headers', 'footers', 'quotes'))
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=('headers', 'footers', 'quotes'))

# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X_train = vectorizer.fit_transform(newsgroups_train.data)
X_test = vectorizer.transform(newsgroups_test.data)

# Split the data into train and validation sets
y_train = newsgroups_train.target
y_test = newsgroups_test.target
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Predict on the validation set
y_pred = classifier.predict(X_val)

# Evaluate the classifier
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)
print(classification_report(y_val, y_pred, target_names=newsgroups_train.target_names))

# Predict on the test set
y_test_pred = classifier.predict(X_test)

# Evaluate the classifier on the test set
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)
print(classification_report(y_test, y_test_pred, target_names=newsgroups_train.target_names))

Validation Accuracy: 0.7986725663716814
                        precision    recall  f1-score   support

           alt.atheism       0.69      0.63      0.66        86
         comp.graphics       0.76      0.94      0.84       107
               sci.med       0.92      0.74      0.82       132
soc.religion.christian       0.81      0.85      0.83       127

              accuracy                           0.80       452
             macro avg       0.79      0.79      0.79       452
          weighted avg       0.81      0.80      0.80       452

Test Accuracy: 0.7549933422103862
                        precision    recall  f1-score   support

           alt.atheism       0.72      0.51      0.60       319
         comp.graphics       0.74      0.94      0.83       389
               sci.med       0.89      0.71      0.79       396
soc.religion.christian       0.70      0.81      0.75       398

              accuracy                           0.75      1502
             macro avg   