In [1]:
# Step 1: Import Required Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Step 2: Load Data
data = {
    'text': [
        'The team won the championship',
        'The government announced new policies',
        'The match ended in a draw',
        'Elections are coming soon',
        'The player scored a goal',
        'New laws were passed by the parliament'
    ],
    'label': ['Sports', 'Politics', 'Sports', 'Politics', 'Sports', 'Politics']
}

df = pd.DataFrame(data)

# Step 3: Preprocessing
# Vectorizing the text data using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['text'])
y = df['label']

# Step 4: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the Naive Bayes Classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 6: Make Predictions
y_pred = model.predict(X_test)
y_pred

array(['Sports', 'Politics'], dtype='<U8')

In [2]:
# Step 7: Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output the results
print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:')
print(conf_matrix)


Accuracy: 1.00
Confusion Matrix:
[[1 0]
 [0 1]]
