In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [14]:
data = pd.read_csv('/content/spam.csv', encoding='latin-1')

In [15]:
data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [16]:
data=data[['v1','v2']]

In [17]:
data.head()

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [18]:
data.columns=['label','text']

In [19]:
data['label']=data['label'].map({'ham': 0, 'spam':1})

In [21]:
data.head()

Unnamed: 0,label,text
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [22]:
x_train,x_test,y_train,y_test=train_test_split(data['text'],data['label'],test_size=0.2,random_state=42)

In [24]:
vectorizer=CountVectorizer()

In [25]:
x_train_counts=vectorizer.fit_transform(x_train)

In [26]:
x_test_counts=vectorizer.transform(x_test)

In [27]:
model=MultinomialNB()

In [28]:
model.fit(x_train_counts,y_train)

In [29]:
y_pred=model.predict(x_test_counts)

In [30]:

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 98.39%


In [31]:
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)



Confusion Matrix:
[[963   2]
 [ 16 134]]


In [32]:
# classification report
class_report = classification_report(y_test, y_pred)
print('Classification Report:')
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       965
           1       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [34]:
def predict_spam(text):
  email_counts=vectorizer.transform([text])
  prediction=model.predict(email_counts)
  if prediction[0]==1:
    return 'spam'
  else:
    return 'Not Spam'

test_email="Congratulations! You've won a $1000 gift card. Click here to claim your prize."
print(predict_spam(test_email))

spam


In [36]:
email=input('Enter email text here:')
predict_spam(email)

Enter email text here:Congratulations! You've won a $1000 gift card. Click here to claim your prize


'spam'