In [66]:
#Import All The Libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

#Import The Dataset
df=pd.read_csv("mail_data.csv")

#Data Cleaning
new_df=df.where((pd.notnull(df)),'')
new_df['Category']=new_df['Category'].map({'ham':1,'spam':0})
x=new_df['Message']
y=new_df['Category']

#Train-Test Split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

#Data Preprocessing
feature_extraction=TfidfVectorizer(min_df=1,stop_words='english',lowercase=True)
x_train_1=feature_extraction.fit_transform(x_train)
x_test_1=feature_extraction.transform(x_test)
y_train=y_train.astype('int')
y_test=y_test.astype('int')

#Models Prediction
models=[]
models.append(("Logistic Regression",LogisticRegression()))
models.append(('KNN',KNeighborsClassifier()))
models.append(('RandomForestClassifier',RandomForestClassifier()))
models.append(('DecisionTreeClassifier',DecisionTreeClassifier()))
models.append(('SVM',SVC(kernel='linear')))

#Find Its Accuracy,Confusion-Matrix,Precision,Recall,F1-Score
for name,model in models:
    print()
    print("*******************Model Name : ",name,"*********************")
    print()
    model.fit(x_train_1,y_train)
    y_pred=model.predict(x_test_1)
    print("Confusion-Matrix :",confusion_matrix(y_test,y_pred))
    print("Accuracy :",accuracy_score(y_test,y_pred))
    print("Precision :",precision_score(y_test,y_pred))
    print("Recall :",recall_score(y_test,y_pred))
    print("F1score :",f1_score(y_test,y_pred))


*******************Model Name :  Logistic Regression *********************

Confusion-Matrix : [[113  36]
 [  0 966]]
Accuracy : 0.967713004484305
Precision : 0.9640718562874252
Recall : 1.0
F1score : 0.9817073170731707

*******************Model Name :  KNN *********************

Confusion-Matrix : [[ 52  97]
 [  0 966]]
Accuracy : 0.9130044843049328
Precision : 0.9087488240827846
Recall : 1.0
F1score : 0.9521931986200098

*******************Model Name :  RandomForestClassifier *********************

Confusion-Matrix : [[126  23]
 [  0 966]]
Accuracy : 0.979372197309417
Precision : 0.9767441860465116
Recall : 1.0
F1score : 0.9882352941176471

*******************Model Name :  DecisionTreeClassifier *********************

Confusion-Matrix : [[125  24]
 [ 14 952]]
Accuracy : 0.9659192825112107
Precision : 0.9754098360655737
Recall : 0.9855072463768116
F1score : 0.98043254376931

*******************Model Name :  SVM *********************

Confusion-Matrix : [[141   8]
 [  1 965]]
Accuracy

In [20]:
new_df.head()

Unnamed: 0,Category,Message
0,1,"Go until jurong point, crazy.. Available only ..."
1,1,Ok lar... Joking wif u oni...
2,0,Free entry in 2 a wkly comp to win FA Cup fina...
3,1,U dun say so early hor... U c already then say...
4,1,"Nah I don't think he goes to usf, he lives aro..."
