# Dimension Reduction Using Lasso vs PCA

In [6]:
from sklearn.metrics import mean_squared_error
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import timeit


import pandas as pd
df=pd.read_csv("D:\\Downloads\\UniversalBank.csv")

df=df.drop(["UserID"],axis=1)
X=df.iloc[:,0:11]
y=df["Personal Loan"]

## Lasso

In [7]:
scaler=StandardScaler()
X_std=scaler.fit_transform(X)

lm1=Lasso(alpha=0.05,random_state=5)
model_L=lm1.fit(X_std,y)
model_L.coef_

print(pd.DataFrame(list(zip(X.columns,model_L.coef_)),columns=["predictor","importance"]))

             predictor  importance
0                  Age    0.000000
1           Experience    0.000000
2               Income    0.094884
3               Family    0.000000
4                CCAvg    0.000000
5            Education    0.007694
6             Mortgage    0.000000
7   Securities Account   -0.000000
8           CD Account    0.026983
9               Online   -0.000000
10          CreditCard   -0.000000


## PCA

In [8]:
pca=PCA(n_components=3,random_state=5)
pca.fit(X)

pca.explained_variance_ratio_

pd.DataFrame(list(zip(X.columns,pca.explained_variance_ratio_)),columns=["predictor","importance"])

#For dimension reduction:
X_new=pca.transform(X)


## Model 1 

In [9]:

X_train1,X_test1,y_train1,y_test1=train_test_split(X_std,y,random_state=5,test_size=0.3)

knn=KNeighborsClassifier(n_neighbors=3)
start = timeit.default_timer()
model1=knn.fit(X_train1,y_train1)
stop = timeit.default_timer()
y_test_pred1=model1.predict(X_test1)

print('Time: ', stop - start)#0.01419949999990422
print(metrics.accuracy_score(y_test1, y_test_pred1)) #0.958
print(metrics.precision_score(y_test1,y_test_pred1)) #0.9215686274509803
print(metrics.recall_score(y_test1,y_test_pred1))#0.6308724832214765


Time:  0.01126160000001164
0.958
0.9215686274509803
0.6308724832214765


## Model 2

In [10]:
X_2=df[["Income","Education","CD Account"]]
scaler=StandardScaler()
X_std2=scaler.fit_transform(X_2)

X_train2,X_test2,y_train2,y_test2=train_test_split(X_std2,y,random_state=5,test_size=0.3)

knn=KNeighborsClassifier(n_neighbors=3)
start = timeit.default_timer()
model2=knn.fit(X_train2,y_train2)
stop = timeit.default_timer()
y_test_pred2=model2.predict(X_test2)

print('Time: ', stop - start)#0.00793799999996736
print(metrics.accuracy_score(y_test2, y_test_pred2)) #0.96
print(metrics.precision_score(y_test2,y_test_pred2)) #0.9158878504672897
print(metrics.recall_score(y_test2,y_test_pred2))#0.6577181208053692

Time:  0.005260799999973642
0.96
0.9158878504672897
0.6577181208053692


## Model 3

In [12]:
X_3=X_new
X_3std=scaler.fit_transform(X_3)
X_train3,X_test3,y_train3,y_test3=train_test_split(X_3std,y,random_state=5,test_size=0.3)

knn=KNeighborsClassifier(n_neighbors=3)
start = timeit.default_timer()
model3=knn.fit(X_train3,y_train3)
stop = timeit.default_timer()
y_test_pred3=model3.predict(X_test3)

print('Time: ', stop - start)
print(metrics.accuracy_score(y_test3, y_test_pred3))
print(metrics.precision_score(y_test3,y_test_pred3)) 
print(metrics.recall_score(y_test3,y_test_pred3))


Time:  0.002430699999990793
0.8913333333333333
0.4396551724137931
0.3422818791946309
