### `Import Libraries & Data` 

In [1]:
import numpy as np
import pandas as pd

training_data = pd.read_csv('../00_data/storepurchasedata.csv')
training_data.head()

Unnamed: 0,Age,Salary,Purchased
0,18,20000,0
1,19,22000,0
2,20,24000,0
3,21,28000,0
4,22,50000,1


In [2]:
training_data.describe()

Unnamed: 0,Age,Salary,Purchased
count,40.0,40.0,40.0
mean,38.1,49525.0,0.55
std,12.557151,19046.484971,0.503831
min,18.0,20000.0,0.0
25%,27.75,35000.0,0.0
50%,37.5,48500.0,1.0
75%,47.25,60000.0,1.0
max,60.0,95000.0,1.0


In [3]:
# spliting the dataset into training features and target 
X = training_data.iloc[:, :-1].values
y = training_data.iloc[:,-1].values

In [4]:
# splitting the dataset into train\test 80%:20% subsets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size =.20,random_state=0)

In [5]:
# standardize the features
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [6]:
print(X_train[:5])
print(50 * "-")
print(X_test[:5])

[[-0.22030803  0.54818427]
 [ 0.61354601  0.29247149]
 [ 0.46193618  0.54818427]
 [ 1.29579021 -0.21895407]
 [ 1.44740004  1.5710354 ]]
--------------------------------------------------
[[ 0.08291162  0.54818427]
 [-0.0686982   0.19018638]
 [ 0.31032636  0.13904382]
 [-1.28157679  0.03675871]
 [-0.82674732  1.5710354 ]]


In [7]:
from sklearn.neighbors import KNeighborsClassifier
# minkowski is for ecledian distance
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)

In [8]:
# Model training
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
y_prob = classifier.predict_proba(X_test)[:,1]

In [9]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test,y_pred))

[[3 0]
 [1 4]]
0.875


In [10]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))


              precision    recall  f1-score   support

           0       0.75      1.00      0.86         3
           1       1.00      0.80      0.89         5

    accuracy                           0.88         8
   macro avg       0.88      0.90      0.87         8
weighted avg       0.91      0.88      0.88         8



In [11]:
new_prediction = classifier.predict(sc.transform(np.array([[40,20000]])))
new_prediction_proba = classifier.predict_proba(sc.transform(np.array([[40,20000]])))[:,1]
print(new_prediction)
print(new_prediction_proba)

[0]
[0.2]


In [12]:
new_pred = classifier.predict(sc.transform(np.array([[45,40000]])))
new_pred_proba = classifier.predict_proba(sc.transform(np.array([[45,40000]])))[:,1]
print(new_pred)
print(new_pred_proba)

[1]
[0.6]


In [13]:
# Picking the Model and Standard Scaler
import pickle
model_file = "../00_saved_models/classifier.pickle"
pickle.dump(classifier, open(model_file,'wb'))
scaler_file = "../00_saved_models/sc.pickle"
pickle.dump(sc, open(scaler_file,'wb'))