In [8]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data={
    'label':['ham','spam','ham','spam','ham','spam','ham','spam'],
    'message':['Hey are we still meeting?','Win $10000 now! Click here!',"Don't forget the meeting at 3 PM.",'Congratulations! You have won a free cruise.','Can you send me the report?','Exclusive deal just for you!','Lunch at noon?','You have been selected for a prize','Let us catch up tomorrow.','Buy one get one free offer inside!']
}
df=pd.DataFrame(data)
df['label_num']=df.label.map({'ham':0,'spam':1})

In [None]:
X_train,X_test,y_train,y_test=train_test_split(df['message'],df['label_num'],test_size=0.2,random_state=42)

In [None]:
text_clf=Pipeline([('vect',CountVectorizer(stop_words='english')),('tfidf',TfidfTransformer()),('clf',MultinomialNB())])

In [None]:
text_clf.fit(X_train,y_train)

In [None]:
y_pred=text_clf.predict(X_test)

In [None]:
print("Accuracy:",accuracy_score(y_test,y_pred))
print("\nClassification Report:\n",classification_report(y_test,y_pred))
print("\nConfusion Matrix:")
conf_mat=confusion_matrix(y_test,y_pred)
sns.heatmap(conf_mat,annot=True,fmt='d',cmap='Blues',xticklabels=['Ham','Spam'],yticklabels=['Ham','Spam'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
scores=cross_val_score(text_clf,df['message'],df['label_num'],cv=5)
print("Cross-validation scores:",scores)
print("Average CV Accuracy:",np.mean(scores))