In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from prettytable import PrettyTable

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

import warnings 
warnings.filterwarnings('ignore')

In [18]:
data = pd.read_csv('/Users/ankusmanish/Desktop/Training/Datasets/Week10/Social_Network_Ads.csv')

In [19]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
User ID            400 non-null int64
Gender             400 non-null object
Age                400 non-null int64
EstimatedSalary    400 non-null int64
Purchased          400 non-null int64
dtypes: int64(4), object(1)
memory usage: 15.7+ KB


In [20]:
data.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [21]:
data.drop(['User ID'], axis = 1, inplace = True)

In [22]:
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [23]:
col = 'Gender'
lb = LabelEncoder()
labeled = lb.fit_transform(data['Gender'])
data[col] = labeled

In [24]:
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [25]:
cols = ['Age', 'EstimatedSalary']
sc = StandardScaler()
scaled_cols = sc.fit_transform(data[cols])
data[cols] = scaled_cols

In [26]:
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,-1.781797,-1.490046,0
1,1,-0.253587,-1.460681,0
2,0,-1.113206,-0.78529,0
3,0,-1.017692,-0.374182,0
4,1,-1.781797,0.183751,0


In [27]:
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
y = np.reshape(y, (len(y),1))

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [29]:
algos = {'DecisionTreeClassifier':DecisionTreeClassifier(), 'RandomForestClassifier':RandomForestClassifier(), 
         'LogisticRegression':LogisticRegression(),
         'Support Vector Classifier':SVC(), 'KNeighborsClassifier':KNeighborsClassifier()}

In [30]:
def algorithms(X_train, X_test, y_train, y_test):
    
    t = PrettyTable(['Classifier', 'Accuracy'])
    
    for key, value in algos.items():
        model = value
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        
        print(key.upper(),'\n')
        t.add_row([key, accuracy_score(y_pred, y_test)])
        print('Calssification Report : \n {}'.format(classification_report(y_pred, y_test)))
        print('Confusion Matrix : \n {}'.format(confusion_matrix(y_pred, y_test)))
        print('Accuracy Score : \n {}'.format(accuracy_score(y_pred, y_test)))
        print('\n')
        print('-' * 100)
    print(t)

In [31]:
algorithms(X_train, X_test, y_train, y_test)

DECISIONTREECLASSIFIER 

Calssification Report : 
               precision    recall  f1-score   support

           0       0.89      0.89      0.89        80
           1       0.83      0.83      0.83        52

    accuracy                           0.86       132
   macro avg       0.86      0.86      0.86       132
weighted avg       0.86      0.86      0.86       132

Confusion Matrix : 
 [[71  9]
 [ 9 43]]
Accuracy Score : 
 0.8636363636363636


----------------------------------------------------------------------------------------------------
RANDOMFORESTCLASSIFIER 

Calssification Report : 
               precision    recall  f1-score   support

           0       0.91      0.89      0.90        82
           1       0.83      0.86      0.84        50

    accuracy                           0.88       132
   macro avg       0.87      0.88      0.87       132
weighted avg       0.88      0.88      0.88       132

Confusion Matrix : 
 [[73  9]
 [ 7 43]]
Accuracy Score : 
 0.87