In [None]:
'''
    Task 2: Spam Detection using MultinomialNB
    ● Load a text dataset (e.g., SMS Spam Collection or any public text
    dataset).
    ● Preprocess using CountVectorizer or TfidfVectorizer.
    ● Train a MultinomialNB classifier.
    ● Evaluate:
    ○ Accuracy
    ○ Precision
    ○ Recall
    ○ Confusion Matrix
'''

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv("spam.csv", encoding='ISO-8859-1')[['v1', 'v2']]
df.columns = ['label', 'message']

df.head()


Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [24]:

le = LabelEncoder()
df['label'] = le.fit_transform(df['label']) 

vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['message'])
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = MultinomialNB()
model.fit(X_train, y_train)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", cm)


Accuracy: 0.9668161434977578
Precision: 1.0
Recall: 0.7533333333333333
Confusion Matrix:
 [[965   0]
 [ 37 113]]


In [26]:
'''
    Task 3: GaussianNB with Iris or Wine Dataset
    ● Train a GaussianNB classifier on a numeric dataset.
    ● Split data into train/test sets.
    ● Evaluate model performance.
    ● Compare with Logistic Regression or Decision Tree briefly
'''

from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)

# Train Logistic Regression
logreg = LogisticRegression(max_iter=200)
logreg.fit(X_train, y_train)
y_pred_logreg = logreg.predict(X_test)

# Train Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Evaluate all
print("GaussianNB Accuracy:", accuracy_score(y_test, y_pred_gnb))
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))

# Classification reports
print("\n--- GaussianNB Report ---\n", classification_report(y_test, y_pred_gnb))
print("\n--- Logistic Regression Report ---\n", classification_report(y_test, y_pred_logreg))
print("\n--- Decision Tree Report ---\n", classification_report(y_test, y_pred_dt))


GaussianNB Accuracy: 0.9777777777777777
Logistic Regression Accuracy: 1.0
Decision Tree Accuracy: 1.0

--- GaussianNB Report ---
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45


--- Logistic Regression Report ---
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45


--- Decision Tree Report ---
               precision    recall  f1-sc