# Spam Detection Using Naive bayes

In [1]:
# install packages
!pip install colorama
import colorama
from colorama import Fore, Back, Style



In [2]:
# import packages
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

In [3]:
# import Data
spam_df = pd.read_csv('spam.csv')

In [4]:
#Inspect Data
spam_df

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will ü b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


In [5]:
 spam_df.groupby('Category').describe()

Unnamed: 0_level_0,Message,Message,Message,Message
Unnamed: 0_level_1,count,unique,top,freq
Category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
ham,4825,4516,"Sorry, I'll call later",30
spam,747,641,Please call our customer service representativ...,4


In [6]:
# turn spam/ham into numerical data, creating a new column called spam
spam_df['spam'] = spam_df['Category'].apply(lambda x: 1 if x == 'spam' else 0)

In [7]:
spam_df.head()

Unnamed: 0,Category,Message,spam
0,ham,"Go until jurong point, crazy.. Available only ...",0
1,ham,Ok lar... Joking wif u oni...,0
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,1
3,ham,U dun say so early hor... U c already then say...,0
4,ham,"Nah I don't think he goes to usf, he lives aro...",0


In [8]:
# create train/test split
X_train, X_test, y_train, y_test = train_test_split(spam_df.Message, spam_df.spam)

In [9]:
X_train.describe()

count                       4179
unique                      3922
top       Sorry, I'll call later
freq                          20
Name: Message, dtype: object

In [10]:
# count the words and store it in a matrix
vectorizer = CountVectorizer()
X_train_count = vectorizer.fit_transform(X_train.values)

In [11]:
X_train_count.toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [12]:
# train model
model = MultinomialNB()
model.fit(X_train_count, y_train)

In [13]:
# pre-test ham
email_ham = ['hi, nice to meet you, wanna see the game together?']
# transforming the email to 1/0 by using vectorizer
email_ham_count = vectorizer.transform(email_ham)
model.predict(email_ham_count)

# the array([0]) means that this massage is identify by ham Email

array([0])

In [14]:
# pre-test spam
email_spam = ['prize  money check money']
email_spam_count = vectorizer.transform(email_spam)
model.predict(email_spam_count)
# the array([1]) means that this massage is identify by spam Email

array([0])

In [15]:
# test model 
X_test_count = vectorizer.transform(X_test)
model.score(X_test_count, y_test)
# our model has this Accuracy, looks good

0.9842067480258435

### use this model to detect Emails

In [16]:
def print_success(text, end='\n'):
    print(Back.GREEN + text, end=end)

def print_warning(text, end='\n'):
     print(Back.YELLOW + text, end=end)

def print_error(text, end='\n'):
    print(Back.RED + text, end=end)

In [17]:
#bulding a function to implement our model
def spam_detection():
    
    email = input('enter any Email for Spam detection: ')
    email = [email]

    #vectorizing the Email
    email_count = vectorizer.transform(email)
    score = model.predict(email_count)


    
    if score == [0]:
        result = print_success('the email you have sent is not Spam')

    else:
        result = print_error('the email you have sent is Spam!!')

    return result

In [18]:
spam_detection()

enter any Email for Spam detection:  hi how are you


[42mthe email you have sent is not Spam


In [19]:
spam_detection()

enter any Email for Spam detection:  10% discount for this month


[41mthe email you have sent is Spam!!
