In [764]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder , OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score

In [765]:
file_path = r'F:\IT Learning\MY PROJECTS\Data Science And ML Projects\Spam SMS Detection\spam.csv'
df = pd.read_csv(file_path, encoding='ISO-8859-1')


In [766]:
df

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,
...,...,...,...,...,...
5698,spam,"""Get a quick loan for your business. Flexible ...",,,
5699,spam,"""Important message: Your subscription will exp...",,,
5700,spam,"""Download our app for a chance to win exciting...",,,
5701,spam,"""Join our loyalty program and enjoy exclusive ...",,,


In [767]:
df.rename(columns={'v1': 'Category', 'v2': 'Message'}, inplace=True)

In [768]:
df

Unnamed: 0,Category,Message,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,
...,...,...,...,...,...
5698,spam,"""Get a quick loan for your business. Flexible ...",,,
5699,spam,"""Important message: Your subscription will exp...",,,
5700,spam,"""Download our app for a chance to win exciting...",,,
5701,spam,"""Join our loyalty program and enjoy exclusive ...",,,


In [769]:
df= pd.DataFrame(df)

In [770]:
df.drop= df.drop(['Unnamed: 2', 'Unnamed: 3','Unnamed: 4'], axis= 1,inplace= True)

In [771]:
df

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5698,spam,"""Get a quick loan for your business. Flexible ..."
5699,spam,"""Important message: Your subscription will exp..."
5700,spam,"""Download our app for a chance to win exciting..."
5701,spam,"""Join our loyalty program and enjoy exclusive ..."


In [772]:
encoder= OneHotEncoder(sparse_output= False)

In [773]:
df['Category']=encoder.fit_transform(df[['Category']]).astype('int')

In [774]:
df

Unnamed: 0,Category,Message
0,1,"Go until jurong point, crazy.. Available only ..."
1,1,Ok lar... Joking wif u oni...
2,0,Free entry in 2 a wkly comp to win FA Cup fina...
3,1,U dun say so early hor... U c already then say...
4,1,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5698,0,"""Get a quick loan for your business. Flexible ..."
5699,0,"""Important message: Your subscription will exp..."
5700,0,"""Download our app for a chance to win exciting..."
5701,0,"""Join our loyalty program and enjoy exclusive ..."


In [775]:
df.shape

(5703, 2)

In [776]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5703 entries, 0 to 5702
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Category  5703 non-null   int32 
 1   Message   5703 non-null   object
dtypes: int32(1), object(1)
memory usage: 67.0+ KB


In [777]:
X= df['Message']
y= df['Category']

In [778]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2, random_state= 10)

In [779]:
feature_extraction= TfidfVectorizer(min_df= 1, stop_words= 'english', lowercase= True)

In [780]:
X_train_featured= feature_extraction.fit_transform(X_train)
X_test_featured= feature_extraction.transform(X_test)

In [781]:
print(X_train_featured)

  (0, 6769)	0.7851679664952446
  (0, 4771)	0.6192828629872802
  (1, 2121)	0.24478817109977258
  (1, 378)	0.3243287963180057
  (1, 2178)	0.2668060150021398
  (1, 3892)	0.2513883881250132
  (1, 2943)	0.3243287963180057
  (1, 189)	0.3243287963180057
  (1, 2256)	0.3048265053608923
  (1, 6012)	0.3103672457059215
  (1, 1130)	0.24241644469599422
  (1, 1574)	0.49205377487048596
  (2, 47)	0.3174109071071861
  (2, 5569)	0.21382760964313688
  (2, 3348)	0.30986594503860254
  (2, 1572)	0.3217155710297922
  (2, 2856)	0.30986594503860254
  (2, 4667)	0.2518202407206443
  (2, 4625)	0.2146640184818183
  (2, 4753)	0.2677923176378301
  (2, 5601)	0.37609274569488016
  (2, 1921)	0.24639394852651927
  (2, 6843)	0.3004879574583798
  (2, 2178)	0.2904097266033281
  (3, 1056)	0.5844746939425332
  :	:
  (4558, 1749)	0.23191752807727364
  (4558, 4572)	0.22243837536422575
  (4558, 5771)	0.24497871257040238
  (4558, 4444)	0.19150847587298436
  (4558, 6923)	0.18470034071346328
  (4558, 7405)	0.17170950603408586
  (45

In [782]:
model= MLPClassifier(hidden_layer_sizes= (3,2), max_iter= 500, activation= 'identity')
model.fit(X_train_featured,y_train)


In [783]:
prediction=model.predict(X_test_featured)

In [784]:
accuracy = accuracy_score(y_test,prediction)

In [785]:
accuracy

0.9807186678352322

In [790]:
Your_mail= [''' congrats, you won free tickets ''']
Your_mail_featuring= feature_extraction.transform(Your_mail)
Your_mail_prediction= model.predict(Your_mail_featuring)

if Your_mail_prediction == 0 :
    print ('Spam')
else:
    print('Ham')

Spam
