Importing Required Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import nltk
import re

Loading Dataset

In [None]:
data=pd.read_csv('/content/drive/MyDrive/IBM/datasets/spam.csv',encoding="ISO-8859-1")

**Data Preprocessing**

Encoding the categorical column

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [None]:
data['v1']=le.fit_transform(data['v1'])

In [None]:
data.head(10)

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,0,"Go until jurong point, crazy.. Available only ...",,,
1,0,Ok lar... Joking wif u oni...,,,
2,1,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,0,U dun say so early hor... U c already then say...,,,
4,0,"Nah I don't think he goes to usf, he lives aro...",,,
5,1,FreeMsg Hey there darling it's been 3 week's n...,,,
6,0,Even my brother is not like to speak with me. ...,,,
7,0,As per your request 'Melle Melle (Oru Minnamin...,,,
8,1,WINNER!! As a valued network customer you have...,,,
9,1,Had your mobile 11 months or more? U R entitle...,,,


In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [None]:
pe=PorterStemmer()
arr=[]

In [None]:
data.shape

(5572, 5)

In [None]:
for i in range(5572):
  msg=data['v2'][i]
  msg=re.sub('[^a-zA-Z]',' ',msg)
  msg=msg.lower()
  msg=msg.split()

  msg=[pe.stem(word) for word in msg if not word in set(stopwords.words('english'))]
  msg=' '.join(msg)
  arr.append(msg)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=10000)

In [None]:
x=cv.fit_transform(arr).toarray()

In [None]:
x

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [None]:
x.shape

(5572, 6221)

In [None]:
y=data['v1'].values

In [None]:
y

array([0, 0, 1, ..., 0, 0, 0])

train and test split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

**Model Building**

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# creating model
model=Sequential()

In [None]:
# adding layers
model.add(Dense(units=6221,activation='relu'))
model.add(Dense(units=10000,activation='relu'))
model.add(Dense(units=1,activation='sigmoid'))

In [None]:
# compiling model
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
# fitting model
model.fit(x_train,y_train,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2df6080a50>

Saving Model

In [None]:
ls

[0m[01;34mdrive[0m/  [01;34msample_data[0m/


In [None]:
cd /content/drive/MyDrive/IBM/datasets

/content/drive/MyDrive/IBM/datasets


In [None]:
model.save('spamDetector.h5')

Testing Model

In [None]:
from tensorflow.keras.models import load_model

In [None]:
model=load_model('/content/drive/MyDrive/IBM/datasets/spamDetector.h5')

In [None]:
def preprocessing(text):
  text=re.sub('[^a-zA-Z]',' ',text)
  text=text.lower()
  text=text.split()
  text=[pe.stem(word) for word in text if not word in set(stopwords.words('english'))]
  text=' '.join(text)
  return text

In [None]:
# spam checking

In [None]:
message1='Get your free ringtone. ude the password XX2Y to activate.'

In [None]:
message1=preprocessing(message)

In [None]:
message1

'get free rington ude password xx activ'

In [None]:
result=model.predict(cv.transform([message1]))



In [None]:
result

array([[0.9997627]], dtype=float32)

In [None]:
if(result>0.5):
  print("Spam Message !!!")
else:
  print("Not Spam")

Spam Message !!!


In [None]:
# ham checking

In [None]:
message2='Please, call me when you are free.'

In [None]:
message2=preprocessing(message2)

In [None]:
message2

'pleas call free'

In [None]:
result=model.predict(cv.transform([message2]))



In [None]:
result

array([[0.00128711]], dtype=float32)

In [None]:
if(result>0.5):
  print("Spam Message !!!")
else:
  print("Not Spam")

Not Spam
