Pick a dataset of your choice with a binary outcome and the potential for at least 15 features. Engineer your features, then create three models. Each model will be run on a training set and a test-set (or multiple test-sets, if you take a folds approach). The models should be:

1) Vanilla logistic regression
2) Ridge logistic regression
3) Lasso logistic regression

In [32]:
import numpy as np
import pandas as pd

In [33]:
df = pd.read_csv("https://www.dropbox.com/s/qhf0ocdqb3gh7ni/logistic_regression.csv?dl=1") 
df.head(5)

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


In [34]:
df.shape

(400, 4)

In [35]:
from sklearn import preprocessing

df['gre'] = preprocessing.scale(df['gre'])
df['gpa'] = preprocessing.scale(df['gpa'])

df.head(5)



Unnamed: 0,admit,gre,gpa,rank
0,0,-1.800263,0.579072,3
1,1,0.626668,0.736929,3
2,1,1.840134,1.605143,1
3,1,0.453316,-0.525927,4
4,0,-0.586797,-1.209974,4


In [36]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# set parameter "C" to a large number to decrease effectiveness of the regularization
lr = LogisticRegression(C=1e9)
X = df.drop("admit",1)
y = df["admit"]

X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,test_size=0.3,random_state=50)

lr_fit = lr.fit(X_train,y_train)

print("Coefficients: ",lr_fit.coef_)
print("Intercept: ",lr_fit.intercept_)
print("R2 (LOG): ",lr.score(X_test,y_test))

Coefficients:  [[ 0.29197099  0.27026342 -0.60650795]]
Intercept:  [0.67035431]
R2 (LOG):  0.65


In [37]:
from sklearn import linear_model

ridgeregr = linear_model.Ridge(alpha=10,fit_intercept=False)
rr_fit = ridgeregr.fit(X_train,y_train)
print("Coefficients: ",rr_fit.coef_)
print("Intercept: ",rr_fit.intercept_)
print("R2 (RIDGE): ",ridgeregr.score(X_test,y_test))

Coefficients:  [0.08871325 0.06070241 0.0973779 ]
Intercept:  0.0
R2 (RIDGE):  -0.12950641001555985


In [38]:
lass = linear_model.Lasso(alpha=0.35)
lassfit = lass.fit(X_train,y_train)
print("Coefficients: ",lassfit.coef_)
print("Intercept: ",lassfit.intercept_)
print("R2 (Lasso): ",lass.score(X_test,y_test))

Coefficients:  [ 0.  0. -0.]
Intercept:  0.3142857142857143
R2 (Lasso):  -0.0005232862375719183
