In [1]:
#Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
#Reading and displaying the dataset
df = pd.read_excel('Bike.xlsx')
df.head()

Unnamed: 0,ID,Age,Gender,Occupation,Phone Type,Current Bike,Relationship,Response
0,1,53,1,4,2,4,3,0
1,2,27,0,3,1,1,1,1
2,3,39,0,2,2,4,4,0
3,4,20,0,2,3,1,4,0
4,5,29,1,1,2,4,3,1


In [3]:
#Dropping irrelevant variable
df = df.drop('ID', axis=1)

In [4]:
df.head()

Unnamed: 0,Age,Gender,Occupation,Phone Type,Current Bike,Relationship,Response
0,53,1,4,2,4,3,0
1,27,0,3,1,1,1,1
2,39,0,2,2,4,4,0
3,20,0,2,3,1,4,0
4,29,1,1,2,4,3,1


In [5]:
#Training set
x = df.drop('Response',axis=1)
x.head()

Unnamed: 0,Age,Gender,Occupation,Phone Type,Current Bike,Relationship
0,53,1,4,2,4,3
1,27,0,3,1,1,1
2,39,0,2,2,4,4
3,20,0,2,3,1,4
4,29,1,1,2,4,3


In [6]:
#Testing set
y = df[['Response']]
y.head()

Unnamed: 0,Response
0,0
1,1
2,0
3,0
4,1


## Trying with feature scaling

In [7]:
from sklearn.preprocessing import StandardScaler     #USing Z- score Normalization. It will convert features mean=0 & SD = 1

In [8]:
sc = StandardScaler()

In [9]:
x_scaled = sc.fit_transform(x)

In [10]:
#Looking for important features
from sklearn.decomposition import PCA

In [11]:
pc = PCA()

In [12]:
pcom = pc.fit_transform(x_scaled)

In [13]:
#All features are important cannot drop any ..
pc.explained_variance_ratio_

array([0.32923189, 0.18690591, 0.15405866, 0.14355314, 0.10797575,
       0.07827464])

In [14]:
#converting array into df
xpca = pd.DataFrame(data=pcom,columns=['Age','Gender','Occupation','Phone Type','Current Bike', 'Reletionship'])
xpca.head()

Unnamed: 0,Age,Gender,Occupation,Phone Type,Current Bike,Reletionship
0,2.052639,-0.00074,1.292778,0.687981,-0.945608,-0.220298
1,1.038835,0.83114,-0.967326,-1.699784,1.479533,-0.559788
2,0.480102,0.280407,-0.695156,1.746875,0.056214,0.321427
3,-0.385017,-1.379399,-2.103656,0.225488,0.039957,-0.698169
4,-0.450811,-0.688229,1.032595,0.796791,0.623894,0.649229


In [15]:
xpca.corr()

Unnamed: 0,Age,Gender,Occupation,Phone Type,Current Bike,Reletionship
Age,1.0,-5.928599e-17,1.794766e-16,-2.467076e-17,3.216372e-18,-5.2231830000000004e-17
Gender,-5.928599e-17,1.0,-3.545927e-16,4.0929080000000006e-17,1.66026e-17,1.282619e-16
Occupation,1.794766e-16,-3.545927e-16,1.0,-8.36061e-17,8.309263e-17,4.70374e-18
Phone Type,-2.467076e-17,4.0929080000000006e-17,-8.36061e-17,1.0,1.810072e-17,5.761854e-17
Current Bike,3.216372e-18,1.66026e-17,8.309263e-17,1.810072e-17,1.0,-1.44047e-16
Reletionship,-5.2231830000000004e-17,1.282619e-16,4.70374e-18,5.761854e-17,-1.44047e-16,1.0


In [16]:
#Train test split for training and testing data
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(xpca,y,test_size=0.3, random_state=50)

In [17]:
xtrain.shape

(1062, 6)

In [18]:
ytrain.shape

(1062, 1)

In [19]:
xtest.shape

(456, 6)

In [20]:
ytest.shape

(456, 1)

In [21]:
#initialize logistic model in object
lr = LogisticRegression()

In [22]:
#Fitting training data
lr.fit(xtrain,ytrain)

  return f(*args, **kwargs)


LogisticRegression()

In [23]:
# checking coefficient values
lr.coef_

array([[-1.19699578, -0.28154191,  0.32664654, -0.05085811,  0.40529559,
        -0.19276342]])

Logistic Reg Accuracy for training and testing data.

In [24]:
#predicting on test data
pred = lr.predict(xtest)

In [25]:
#Testing accuracy
accuracy_score(ytest,pred)

0.7741228070175439

In [26]:
#predicting on training data
pred1 = lr.predict(xtrain)

In [27]:
#Training accuracy
accuracy_score(ytrain,pred1)

0.7570621468926554

In [28]:
# Best-fit model bcz whatever the data it trained it gives good accuracy on testing data also.

In [29]:
confusion_matrix(ytest,pred)

array([[142,  55],
       [ 48, 211]], dtype=int64)

### Implementing SVM on ktm data


In [30]:
from sklearn.svm import SVC

In [31]:
sv = SVC()

In [32]:
sv.fit(xtrain, ytrain)

  return f(*args, **kwargs)


SVC()

In [33]:
predsv = sv.predict(xtest)

In [34]:
# predsv

SVM Accuracy for training and testing data.

In [35]:
#Testing accuracy
accuracy_score(ytest, predsv)

0.7697368421052632

In [36]:
predsv1 = sv.predict(xtrain)

In [37]:
#Training accuracy
accuracy_score(ytrain, predsv1)

0.8173258003766478

In [38]:
confusion_matrix(predsv, ytest)

array([[153,  61],
       [ 44, 198]], dtype=int64)

## Finding the right k value using cross validation technique.

In [39]:
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

In [40]:
score = []
for k in range(1,40):
    knn = KNeighborsClassifier(n_neighbors = k)
    w = cross_val_score(knn, x,y, cv=5, scoring='accuracy') #cv is no. of departments. Data points are divided into 5 departments
    score.append(w.mean())

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


In [None]:
import matplotlib.pyplot as plt
plt.plot(range(1,40), score)
plt.xlabel('no. of neighbours/ value of k' )
plt.ylabel('accuracy')

Text(0, 0.5, 'accuracy')

In [None]:
# score

# Implementing knn using cross validation technique

In [None]:
kn = KNeighborsClassifier(n_neighbors= 10)

In [None]:
kn.fit(xtrain,ytrain)

KNN using CV Accuracy for training and testing data.

In [None]:
kn_pred = kn.predict(xtest)

In [None]:
#Accuracy on test set
accuracy_score(ytest,kn_pred)

In [None]:
kn_pred1 = kn.predict(xtrain)

In [None]:
#Training accuracy
accuracy_score(kn_pred1, ytrain)

In [None]:
#Find out the working logic of knn regressor
#find out how elbow curve looks like
#find out the diff bw eucledian distance, manhattan and mainkawski  

# Implementing knn using hyper parameter tuning

In [None]:
#Using grid search
from sklearn.model_selection import GridSearchCV

In [None]:
tuned_knn = KNeighborsClassifier()

In [None]:
k_parameter = {'n_neighbors':[10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]}

In [None]:
H_knn = GridSearchCV(tuned_knn, param_grid= k_parameter, verbose=1, n_jobs= -1)

In [None]:
H_knn.fit(x,y)

In [None]:
H_knn.best_score_

In [None]:
H_knn.best_params_

In [None]:
H_knn.best_estimator_

In [None]:
#using random search
from sklearn.model_selection import RandomizedSearchCV

In [None]:
Hr_knn = RandomizedSearchCV(tuned_knn, param_distributions= k_parameter, verbose=1, n_jobs= -1)

In [None]:
Hr_knn.fit(x,y)

In [None]:
Hr_knn.best_score_

In [None]:
Hr_knn.best_estimator_

In [None]:
Hr_knn.best_params_

## Conclusion:
SVM model is best model among all these models because its giving best accuracy with error rate approx 5% of difference.
The training and testing is 76 and 79 % approx before feature scaling. After scaling training accuracy is 76 and 81 is testing accuracy.