In [21]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [20]:
models={
    'LR':LogisticRegression(),
    'KNN':KNeighborsClassifier(),
    'DT':DecisionTreeClassifier(),
    'SVC':SVC(),
    'NB':GaussianNB(),
    'XGC':XGBClassifier(),
    'RF':RandomForestClassifier() 
}

In [2]:
df = pd.read_csv("Assignment Datasets/Social_Network_Ads.csv")
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
df.shape

(400, 5)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [5]:
df.drop(["User ID"],axis=1, inplace=True)
df = pd.get_dummies(data=df, columns=["Gender"], drop_first=True)
df.head()

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Male
0,19,19000,0,1
1,35,20000,0,1
2,26,43000,0,0
3,27,57000,0,0
4,19,76000,0,1


In [6]:
df.isnull().sum()

Age                0
EstimatedSalary    0
Purchased          0
Gender_Male        0
dtype: int64

In [9]:
x = df.drop(["Purchased"], axis=1)
y = df["Purchased"]

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [13]:
y_train.value_counts()

0    214
1    106
Name: Purchased, dtype: int64

In [15]:
smote = SMOTE()
x_train, y_train = smote.fit_sample(x_train, y_train)

In [16]:
y_train.value_counts()

0    214
1    214
Name: Purchased, dtype: int64

In [18]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train

array([[ 1.91835924, -0.85361798, -0.89772333],
       [-0.02474277, -0.85361798,  1.11392895],
       [-0.70482848, -1.51785975,  1.11392895],
       ...,
       [ 0.84965314,  0.93402266, -0.89772333],
       [ 1.33542864, -1.21341561, -0.89772333],
       [ 0.55818783, -0.79973137, -0.89772333]])

In [22]:
for name,model in  models.items():
    print(f'using {name}: ')
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    print(f'Training Accuracy :{accuracy_score(y_train,model.predict(x_train))}')
    print(f'Testing Accuracy :{accuracy_score(y_test,y_pred)}')
    print(f'Recall: {recall_score(y_test,y_pred)}')
    print(f'precision: {precision_score(y_test,y_pred)}')
    print(f'F1-score: {f1_score(y_test,y_pred)}')
    print('-'*60)

using LR: 
Training Accuracy :0.8598130841121495
Testing Accuracy :0.8
Recall: 0.7837837837837838
precision: 0.7837837837837838
F1-score: 0.7837837837837838
------------------------------------------------------------
using KNN: 
Training Accuracy :0.9392523364485982
Testing Accuracy :0.9
Recall: 0.8918918918918919
precision: 0.8918918918918919
F1-score: 0.8918918918918919
------------------------------------------------------------
using DT: 
Training Accuracy :1.0
Testing Accuracy :0.875
Recall: 0.8918918918918919
precision: 0.8461538461538461
F1-score: 0.868421052631579
------------------------------------------------------------
using SVC: 
Training Accuracy :0.927570093457944
Testing Accuracy :0.9125
Recall: 0.918918918918919
precision: 0.8947368421052632
F1-score: 0.9066666666666667
------------------------------------------------------------
using NB: 
Training Accuracy :0.8948598130841121
Testing Accuracy :0.8625
Recall: 0.8918918918918919
precision: 0.825
F1-score: 0.857142857



Training Accuracy :0.9953271028037384
Testing Accuracy :0.9
Recall: 0.8918918918918919
precision: 0.8918918918918919
F1-score: 0.8918918918918919
------------------------------------------------------------
using RF: 
Training Accuracy :1.0
Testing Accuracy :0.8875
Recall: 0.8648648648648649
precision: 0.8888888888888888
F1-score: 0.8767123287671232
------------------------------------------------------------
