In [114]:
import pandas as pd
df=pd.read_csv('SMSSpamCollection',
               sep='\t',names=["label","message"])

In [115]:
df.head()

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [116]:
df.iloc[3]
df.shape

(5572, 2)

In [117]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [118]:
x_train,x_test,y_train,y_test=train_test_split(df['message'],df['label'],test_size=0.2,random_state=42)

In [119]:
vectorizer=CountVectorizer()
x_train_counts=vectorizer.fit_transform(x_train)
x_test_counts=vectorizer.transform(x_test)

In [120]:
# Initialise and train the model

model = MultinomialNB()
model.fit(x_train_counts,y_train)



In [121]:
#make predictions 
y_pred=model.predict(x_test_counts)

#evaluation
print("confusion matrix:\n",confusion_matrix(y_test,y_pred))
print("Accuracy",accuracy_score(y_test,y_pred))

print("Classification Report:\n",classification_report(y_test,y_pred))

confusion matrix:
 [[966   0]
 [  9 140]]
Accuracy 0.9919282511210762
Classification Report:
               precision    recall  f1-score   support

         ham       0.99      1.00      1.00       966
        spam       1.00      0.94      0.97       149

    accuracy                           0.99      1115
   macro avg       1.00      0.97      0.98      1115
weighted avg       0.99      0.99      0.99      1115



In [122]:
new_message=["application for leave"]


In [123]:
new_message_counts=vectorizer.transform(new_message)

prediction=model.predict(new_message_counts)

print("Prediction:",prediction[0])

Prediction: ham


In [124]:
new_messages=["Claim your vaction now by clicking this link!",
              "Hey, are we still on for the meeting tomorrow?",
              "Exclusive offer just for you. Buy now!"]

new_counts=vectorizer.transform(new_messages)
predictions= model.predict(new_counts)

for msg, label in zip(new_messages,predictions):
    print(f"message: '{msg}' -> prediction: {label}")

message: 'Claim your vaction now by clicking this link!' -> prediction: spam
message: 'Hey, are we still on for the meeting tomorrow?' -> prediction: ham
message: 'Exclusive offer just for you. Buy now!' -> prediction: ham


In [125]:
import joblib
joblib.dump(vectorizer,"scaled.joblib")

['scaled.joblib']

In [126]:
import joblib
joblib.dump(model,"model.joblib")

['model.joblib']

In [127]:
model_file=joblib.load("model.joblib")

In [128]:
scaled_file=joblib.load("scaled.joblib")

In [129]:
msg=["application for leave"]
data=scaled_file.transform(msg)
pre=model_file.predict(data)
print(f"{msg} --> {pre[0]}")

['application for leave'] --> ham
