In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
newsgroups = fetch_20newsgroups(subset='all') # Load both training and test data
X = newsgroups.data # News articles
y = newsgroups.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

In [None]:
y_pred = model.predict(X_test_tfidf)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=newsgroups.target_names))

In [None]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
new_text = ["This is an example of a news article about technology and innovation."]
new_text_tfidf = vectorizer.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(f"\nPredicted Category for the new text:
{newsgroups.target_names[prediction[0]]}")