In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = pd.read_csv('spam.csv')

In [3]:
dataset.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [4]:
dataset.Category.value_counts()

Category
ham     4825
spam     747
Name: count, dtype: int64

In [5]:
dataset['spam'] = dataset['Category'].apply(lambda x: 1 if x=='spam' else 0)

In [6]:
dataset.shape

(5572, 3)

In [7]:
dataset.head()

Unnamed: 0,Category,Message,spam
0,ham,"Go until jurong point, crazy.. Available only ...",0
1,ham,Ok lar... Joking wif u oni...,0
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,1
3,ham,U dun say so early hor... U c already then say...,0
4,ham,"Nah I don't think he goes to usf, he lives aro...",0


In [8]:
type(dataset['Category'])

pandas.core.series.Series

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(dataset.Message, dataset.spam, test_size=0.2)

In [10]:
X_train.shape

(4457,)

In [11]:
type(X_train)

pandas.core.series.Series

In [12]:
X_train[:4]

2836    Ya they are well and fine., BBD(pooja) full pi...
4110                     I want to lick your pussy now...
5105    I wnt to buy a BMW car urgently..its vry urgen...
4844                I need details about that online job.
Name: Message, dtype: object

In [13]:
from sklearn.feature_extraction.text import CountVectorizer

v = CountVectorizer()

X_train_cv = v.fit_transform(X_train)
X_train_cv    

<4457x7772 sparse matrix of type '<class 'numpy.int64'>'
	with 59304 stored elements in Compressed Sparse Row format>

In [14]:
X_train_cv.toarray()[:2][0]

array([0, 0, 0, ..., 0, 0, 0])

In [15]:
from sklearn.naive_bayes import MultinomialNB

In [16]:
model = MultinomialNB()

In [17]:
model.fit(X_train_cv, y_train)

In [18]:
X_test_cv = v.transform(X_test)

In [19]:
y_pred = model.predict(X_test_cv)

In [20]:
from sklearn.metrics import confusion_matrix, classification_report

In [21]:
confusion_matrix(y_test, y_pred)

array([[969,   2],
       [  7, 137]])

In [22]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       971
           1       0.99      0.95      0.97       144

    accuracy                           0.99      1115
   macro avg       0.99      0.97      0.98      1115
weighted avg       0.99      0.99      0.99      1115



In [23]:
emails = [
    'Hey mohan, can we get together to watch footbal game tomorrow?',
    'Upto 20% discount on parking, exclusive offer just for you. Dont miss this reward!'
]

In [24]:
test_email = v.transform(emails)

In [25]:
model.predict(test_email)

array([0, 1])

In [26]:
from sklearn.pipeline import Pipeline

In [27]:
clf = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('nb', MultinomialNB())
])

In [28]:
clf.fit(X_train, y_train)

In [29]:
y_pred = clf.predict(X_test)

In [30]:
print(classification_report(y_pred, y_test))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00       976
           1       0.95      0.99      0.97       139

    accuracy                           0.99      1115
   macro avg       0.97      0.99      0.98      1115
weighted avg       0.99      0.99      0.99      1115



In [31]:
clf.predict(emails)

array([0, 1])

In [32]:
email = ['4 of 88 Kickstart your computer science career']

clf.predict(email)

array([0])

In [33]:
email = ['''Dear Valued Customer,

Congratulations! You have been selected for an exclusive offer that you cannot afford to miss. This limited-time opportunity is your chance to save big on a wide range of products and services.

Act now and enjoy incredible discounts on luxury vacations, the latest gadgets, fashion accessories, and much more. Don't wait! These deals won't last forever.

To claim your offer, simply click on the link below and provide your contact information. Remember, this offer is only available to a select few, so don't delay!

[Link to a suspicious website]

Hurry and take advantage of this exclusive offer today! You deserve to treat yourself with the best deals.

Best regards,
The Amazing Deals Team''']

clf.predict(email)

array([1])

In [34]:
email = ['''Dear ramesh,

We hope this email finds you well. We are thrilled to invite you to join our book club, where literature enthusiasts like yourself come together to share their love for reading.

Our book club meets once a month to discuss a selected book, exchange ideas, and engage in stimulating conversations. It's a fantastic opportunity to explore various genres and discover new authors while connecting with fellow book lovers in a friendly and inclusive environment.

The upcoming book for our next meeting is [Book Title] by [Author]. We believe it's an excellent choice that will inspire insightful discussions. We encourage you to read the book before the meeting, although it's not mandatory to participate.

Date: 15-8-2023
Time: 10:17 am
Location: Rajkot

We would be delighted if you could join us for this enriching experience. Please let us know if you're interested or have any questions. We'll be more than happy to provide further details and assist you in any way.

Looking forward to hearing from you soon!

Warm regards,
Manish''']

clf.predict(email)

array([0])