In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,accuracy_score,recall_score,precision_score,f1_score,fbeta_score,classification_report

In [3]:
df=pd.read_csv('Social_Network_Ads.csv')
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [5]:
df=pd.get_dummies(df,columns=['Gender'],drop_first=True)
df

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Gender_Male
0,15624510,19,19000,0,1
1,15810944,35,20000,0,1
2,15668575,26,43000,0,0
3,15603246,27,57000,0,0
4,15804002,19,76000,0,1
...,...,...,...,...,...
395,15691863,46,41000,1,0
396,15706071,51,23000,1,1
397,15654296,50,20000,1,0
398,15755018,36,33000,0,1


In [6]:
x=df.drop(['Purchased'],axis=1)
y=df['Purchased']

In [7]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.25)

In [8]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)
x_train

array([[-1.06266489,  0.09342615, -0.27948469,  1.        ],
       [-0.71301688, -0.28532852,  0.04873576, -1.        ],
       [-1.56079588, -0.47470585,  0.46647089, -1.        ],
       ...,
       [ 1.60841146, -1.70565853,  0.43663266,  1.        ],
       [-1.6653228 , -1.42159253, -0.21980824, -1.        ],
       [-0.23195469,  1.04031283,  0.10841221,  1.        ]])

In [9]:
models={
    'LR':LogisticRegression(),
    'KNN':KNeighborsClassifier(),
    'DT':DecisionTreeClassifier(),
    'SVC':SVC(),
    'NB':GaussianNB(),
    'XGC':XGBClassifier(),
    'RF':RandomForestClassifier()
    
}

In [10]:
for name,model in  models.items():
    print(f'using {name}: ')
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    print(f'Training Accuracy :{accuracy_score(y_train,model.predict(x_train))}')
    print(f'Testing Accuracy :{accuracy_score(y_test,y_pred)}')
    print(f'Confusion matrix:\n {confusion_matrix(y_test,y_pred)}')
    print(f'Recall: {recall_score(y_test,y_pred)}')
    print(f'precision: {precision_score(y_test,y_pred)}')
    print(f'F1-score: {f1_score(y_test,y_pred)}')
    print(f'Fbeta-score: {fbeta_score(y_test,y_pred,beta=0.5)}')
    print(classification_report(y_test,y_pred))
    print('-'*33)


using LR: 
Training Accuracy :0.8533333333333334
Testing Accuracy :0.82
Confusion matrix:
 [[57  7]
 [11 25]]
Recall: 0.6944444444444444
precision: 0.78125
F1-score: 0.7352941176470588
Fbeta-score: 0.7621951219512195
              precision    recall  f1-score   support

           0       0.84      0.89      0.86        64
           1       0.78      0.69      0.74        36

    accuracy                           0.82       100
   macro avg       0.81      0.79      0.80       100
weighted avg       0.82      0.82      0.82       100

---------------------------------
using KNN: 
Training Accuracy :0.9333333333333333
Testing Accuracy :0.84
Confusion matrix:
 [[51 13]
 [ 3 33]]
Recall: 0.9166666666666666
precision: 0.717391304347826
F1-score: 0.8048780487804877
Fbeta-score: 0.75
              precision    recall  f1-score   support

           0       0.94      0.80      0.86        64
           1       0.72      0.92      0.80        36

    accuracy                           0.84 



Training Accuracy :1.0
Testing Accuracy :0.86
Confusion matrix:
 [[52 12]
 [ 2 34]]
Recall: 0.9444444444444444
precision: 0.7391304347826086
F1-score: 0.8292682926829269
Fbeta-score: 0.7727272727272727
              precision    recall  f1-score   support

           0       0.96      0.81      0.88        64
           1       0.74      0.94      0.83        36

    accuracy                           0.86       100
   macro avg       0.85      0.88      0.86       100
weighted avg       0.88      0.86      0.86       100

---------------------------------


In [11]:
model=RandomForestClassifier()
model.fit(x_train,y_train)

RandomForestClassifier()

In [12]:
import joblib
joblib.dump(model,'model.h5')
joblib.dump(scaler,'scaler.h5')

['scaler.h5']