# PA 2 - CS6510 (CS17MTECH11028)

# Question 2

In [1]:
import numpy as np
from sklearn.linear_model import Ridge, Lasso
from scipy.optimize import minimize, check_grad
from sklearn.metrics import accuracy_score, auc, r2_score

## Create design matrix

In [3]:
m = 150
d = 75
X = np.random.rand(m, d)
print(X.shape)

# Setup theta vector
theta = np.zeros(d)
t_1 = np.random.randint(1,9)
theta[:10] = np.append([10]*t_1, [-10]*(10-t_1))
print(theta.shape)

# Add random noise
noise = 0.3162 * np.random.randn(150) + 0

# Generate output column
y = np.dot(X, theta) + noise
print(y.shape)

(150, 75)
(75,)
(150,)


## Split into training, cross-validation and test dataset

In [4]:
X_train = X[:80,:]
y_train = y[:80]

X_cv = X[80:100, :]
y_cv = y[80:100]

X_test = X[100:150, :]
y_test = y[100:150]

## Ridge Regression Cost Function and Gradient

In [5]:
def ridgeRegCostAndGradient( theta, X, y, lamb):
    # Returns the cost function value and gradient (75x1) for each feature
    m = X.shape[0]
    cost = np.sum(np.power(y - np.dot(X, theta), 2)/(2*m)) +  np.dot(theta.transpose(), theta)*lamb/(2*m)
    grad = np.dot(X.transpose(), np.dot(X, theta)-y)/m + lamb*theta/m
    return cost, grad

## Run Ridge Regression minimization

In [7]:
# Best parameter finalized after using cross-validation for various parameters
lamb = 0.001
theta_0 = np.zeros(theta.shape)
res = minimize(ridgeRegCostAndGradient, theta_0, (X_train, y_train, lamb), jac = True, method = 'CG')
#print(res)
w = res['x']
print(w)

[  9.50401083e+00   1.01122981e+01   1.03138488e+01   9.58312748e+00
   9.52587307e+00   1.06364044e+01  -9.77593638e+00  -9.31610479e+00
  -9.57064471e+00  -1.04205445e+01  -3.86018941e-01   1.33687749e-01
  -2.56188032e-02  -5.14168295e-02  -5.00341505e-02   6.00489465e-02
  -3.71010728e-01  -3.69391542e-01  -9.06328852e-02  -1.91659212e-02
   1.57670535e-01   2.66520748e-03   2.85405209e-01  -2.98322382e-01
   4.61003760e-01  -3.20887784e-02  -1.15574349e-01  -4.69269281e-02
  -1.47277157e-01   3.20423530e-01   1.77566068e-01  -2.88887645e-01
   5.31632553e-01   1.97139341e-02   6.62415188e-01  -2.53555592e-01
  -7.86195261e-01  -1.17354439e-01   1.07007998e+00  -6.44917437e-01
   6.46302358e-01   1.04879542e+00  -4.05690355e-01  -4.75552201e-02
   2.50946356e-01   4.49496438e-01   1.66794179e-01  -1.23733605e-01
  -6.12613386e-01   3.30118339e-01   2.06934858e-02   4.23519068e-01
  -1.31163876e-01   2.01654355e-01   1.89509750e-01   1.58015504e-01
  -2.72749832e-01   4.09607382e-01

## Using Cross-validation set to find best value of lambda

In [8]:
lamb = [1, 0.5, 0.3,0.2, 0.1, 0.05, 0.01, 0.005, 0.003, 0.001]

for l in lamb:
    res = minimize(ridgeRegCostAndGradient, theta_0, (X_train, y_train, l), jac = True, method = 'CG')
    w   = res['x']
    y_pred = np.dot(X_cv, w)
    m = X_cv.shape[0]
    print('Cross-validation for lambda {0} is {1}'.format(l, r2_score(y_cv, y_pred)))
    
# Test on test dataset
final_lamb = 0.001
res = minimize(ridgeRegCostAndGradient, theta_0, (X_train, y_train, final_lamb), jac = True, method = 'CG')
w   = res['x']
y_pred_test = np.dot(X_test, w)
m = X_test.shape[0]
print('\nRidge Regression R2 Score on test dataset for lambda {0} is {1}'.format(final_lamb, r2_score(y_test, y_pred_test)))

Cross-validation for lambda 1 is 0.8526176973021713
Cross-validation for lambda 0.5 is 0.8881558647067153
Cross-validation for lambda 0.3 is 0.9075284861206969
Cross-validation for lambda 0.2 is 0.9206317347037971
Cross-validation for lambda 0.1 is 0.9406551598950957
Cross-validation for lambda 0.05 is 0.9575642774209345
Cross-validation for lambda 0.01 is 0.9790129272059639
Cross-validation for lambda 0.005 is 0.9820790592105224
Cross-validation for lambda 0.003 is 0.9828742285288816
Cross-validation for lambda 0.001 is 0.9835894798802565

Ridge Regression R2 Score on test dataset for lambda 0.001 is 0.9865236037180654


### Observations on Ridge Regression

1. None of the 75 co-efficients are 0. Thus, all the co-efficients with true value have been estimated as non-zero.
2. 3-4 co-efficients are in the range of 10^-2.
3. This tells that Ridge Regression does not work well when only few of the components matter to the output class.
4. Ridge regression does not help in making components to 0.

In [9]:
clf = Lasso(alpha=final_lamb)
clf.fit(X_train, y_train)
print('Lasso Regression Score on test dataset for lambda {0} is {1}'.format(final_lamb, clf.score(X_test, y_test)))
print('Co-efficients:')
print(clf.coef_)

Lasso Regression Score on test dataset for lambda 0.001 is 0.9972130987562898
Co-efficients:
[  1.00237394e+01   9.89500371e+00   1.02448083e+01   9.97973912e+00
   9.87096506e+00   1.00504247e+01  -9.86407145e+00  -9.81781705e+00
  -9.84287834e+00  -9.95446716e+00  -3.19408392e-02  -3.32118830e-02
  -1.55706683e-01  -5.78996264e-02   0.00000000e+00  -1.18053347e-01
  -2.65476123e-01   0.00000000e+00  -8.29563120e-02  -0.00000000e+00
   1.54670464e-01  -5.12065551e-02   1.02923350e-01  -1.32654351e-01
   0.00000000e+00  -8.52938432e-03   1.57705359e-02   1.09802406e-01
  -5.21613301e-02   2.12651029e-01   1.90416922e-01  -2.92291832e-01
   1.62734203e-01  -7.22097015e-02   3.55350521e-02  -0.00000000e+00
  -2.55955547e-01  -0.00000000e+00   4.52845342e-01  -3.43396965e-01
   1.86687348e-01   1.65651749e-01  -1.44959060e-01  -0.00000000e+00
   2.13788060e-01   3.99286675e-02   1.90645143e-01  -2.36321260e-02
   0.00000000e+00  -0.00000000e+00   0.00000000e+00   3.73272237e-01
  -0.00000

### Observations on Lasso Regression:

1. The score Lasso regression with same value of regression parameter (lambda) is higher.
2. Around 15 components have zero weight. It is a good improvement over Ridge regression where no component had 0 weight.
3. This shows the property of Lasso Regression that it works well when most of the features are not contributing to the output and expect a 0 weight.

In [226]:
# Debugging with sklearn ridge regression
clf = Ridge(alpha=0.001)
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))
print(clf.coef_)

0.956712746039
[  9.29137717  10.05406575   9.6779121    8.66846397  10.34292812
  -9.93358548 -10.29422236  -8.87066849 -10.46927695  -9.82405289
   0.64245597  -0.397585     0.26363489   0.79079923  -0.09762192
  -0.23467183  -0.71437651   0.47149143   0.86826085   0.77079415
  -0.25658072   1.18897426  -0.24756031   0.36858452  -0.34360844
   0.12992103   0.62685166   0.63231174   0.99884239  -0.08241659
  -0.12676361  -0.30477545   0.41390597  -0.41054053   0.71053354
   1.17869789  -0.22233989   0.52527025  -0.75955876  -0.298957
   0.84622329   0.50225394   0.88888368   0.48619408  -0.54041187
   0.01533551  -0.65320636  -0.3711382   -0.31487865   1.2570542
   0.86235333   0.54356732  -0.81859463   0.65084397   0.81106435
  -0.70356194  -0.87718769  -0.08109026  -0.65897486   0.39333052
  -0.0127953   -0.41922138   0.47073213  -0.19574749  -0.86603208
   0.32397324  -0.86648646  -0.34351225  -0.83735135   0.62438122
  -0.60583903   0.02514896  -0.25727958   0.83006389   0.1624421