In [None]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [None]:
newsgroups = fetch_20newsgroups(subset='all')

In [None]:
X = newsgroups.data # Text data
y = newsgroups.target

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_tfidf = vectorizer.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.3,
random_state=42)

In [None]:
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

In [None]:
y_pred = nb_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of the Multinomial Naive Bayes model: {accuracy * 100:.2f}%')

In [None]:
print('\nClassification Report:')
print(classification_report(y_test, y_pred, target_names=newsgroups.target_names))

In [None]:
sample_news = [
"NASA's Perseverance rover on Mars has successfully collected its first sample of
Martian rock."
]


In [None]:
sample_tfidf = vectorizer.transform(sample_news)

In [None]:
predicted_category = nb_classifier.predict(sample_tfidf)
print(f'\nPredicted Category for the sample news:
{newsgroups.target_names[predicted_category[0]]}')