In [1]:

import pandas as pd
df=pd.read_csv("SMSSpamCollection", sep='\t',names=["label","message"])
df

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will ü b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [2]:
df.head()

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [3]:
df.iloc[3]
df.shape

(5572, 2)

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix

In [8]:
x_train, x_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

In [9]:
vectorizer = CountVectorizer()
x_train_counts = vectorizer.fit_transform(x_train)
x_test_counts = vectorizer.transform(x_test)

In [10]:
## Initilize and train model

model= MultinomialNB()
model.fit(x_train_counts, y_train)


In [11]:
## Make Predictions
y_pred = model.predict(x_test_counts)

## Evaluation

print("confusion matrix:\n",confusion_matrix(y_test,y_pred))
print("Accuracy:", accuracy_score(y_test,y_pred))
print("Classification Report:\n", classification_report(y_test,y_pred))



confusion matrix:
 [[966   0]
 [  9 140]]
Accuracy: 0.9919282511210762
Classification Report:
               precision    recall  f1-score   support

         ham       0.99      1.00      1.00       966
        spam       1.00      0.94      0.97       149

    accuracy                           0.99      1115
   macro avg       1.00      0.97      0.98      1115
weighted avg       0.99      0.99      0.99      1115



In [17]:
new_message = ["application for leave"]

In [19]:
new_message_counts = vectorizer.transform(new_message)

prediction = model.predict(new_message_counts)
print("Prediction:",prediction[0])

Prediction: ham


In [21]:
new_messages = [
    "Claim your free vacation now by clicking this link!",
    "Hey,are we still on for the meeting",
    "Exclusive offer just for you"
]

#Transform and predict
new_counts = vectorizer.transform(new_messages)
predictions = model.predict(new_counts)

for msg,label in zip(new_messages, predictions):
    print(f"Message: '{msg}' -> Prediction: {label}")

Message: 'Claim your free vacation now by clicking this link!' -> Prediction: spam
Message: 'Hey,are we still on for the meeting' -> Prediction: ham
Message: 'Exclusive offer just for you' -> Prediction: ham


In [23]:
import joblib
joblib.dump(model,"model.joblib")

['model.joblib']

In [25]:
import joblib
joblib.dump(vectorizer,"scaled.joblib")

['scaled.joblib']

In [27]:
model_file=joblib.load("model.joblib")

In [29]:
scaled_file = joblib.load("scaled.joblib")

In [31]:
msg=["application for leave"]
data=scaled_file.transform(msg)
pre=model_file.predict(data)
print(f"{msg} --> {pre[0]}")

['application for leave'] --> ham
