<a href="https://colab.research.google.com/github/Sumaiya379/AI-and-ML/blob/main/Naive_report.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import string
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, accuracy_score

#Load dataset & Preprocessing
df=pd.read_csv("news_category.csv")
df.rename(columns={'Label ': 'Label', 'Message': 'Message'}, inplace=True)
df['Label'] = df['Label'].map({'Sports': 0, 'Politics': 1})
df.head()
print("\nSample before preprocessing:\n", df.head())
def preprocess_text(Message):
    # Lowercase
    Message = Message.lower()
    # Remove punctuation
    Message = "".join([char for char in Message if char not in string.punctuation])
    return Message

df['Message'] = df['Message'].apply(preprocess_text)

print("\nSample after preprocessing:\n", df['Message'].head())

#Train/test split

X_train, X_test, y_train, y_test = train_test_split(df['Message'],df['Label'],test_size=0.25,random_state=1)

print(f"\nOriginal dataset contains {df.shape[0]} news articles")
print(f"Training set contains {X_train.shape[0]} articles")
print(f"Testing set contains {X_test.shape[0]} articles")


#Convert to Bag of Words

count_vector = CountVectorizer()
train_matrix = count_vector.fit_transform(X_train)
test_matrix = count_vector.transform(X_test)

#Train Naïve Bayes classifier
naive_bayes = MultinomialNB()
naive_bayes.fit(train_matrix, y_train)

# 8. Predictions & accuracy
predictions = naive_bayes.predict(test_matrix)
accuracy = accuracy_score(y_test, predictions)
print(f"\nAccuracy on Test: {accuracy:.2f}")
cm = confusion_matrix(y_test, predictions)
print("\nConfusion Matrix:")
print(cm)


Sample before preprocessing:
    Label                                            Message
0      0  Asia Cup: Bangladesh to play all group matches...
1      1  Chaos, gangs, gunfire: Gaza aid fails to reach...
2      0     Proposed two-tier system a test for Bangladesh
3      0  Zayyan Ahmed, a USA-based footballer, has join...
4      1  A total of 29 registered political parties, in...

Sample after preprocessing:
 0    asia cup bangladesh to play all group matches ...
1    chaos gangs gunfire gaza aid fails to reach mo...
2        proposed twotier system a test for bangladesh
3    zayyan ahmed a usabased footballer has joined ...
4    a total of 29 registered political parties inc...
Name: Message, dtype: object

Original dataset contains 100 news articles
Training set contains 75 articles
Testing set contains 25 articles

Accuracy on Test: 0.80

Confusion Matrix:
[[11  3]
 [ 2  9]]
