# Regularization

## 1. 개념

- 데이터가 많지 않으면 학습이 안좋게 될 수 있다. 특정 weight 값이 너무 커지거나 너무 작아질 수 있다. 이를 막기 위해 페널티를 준다.
- 기존 Objective function(목표 함수)에 항을 하나 더한다.
    + Ridge Regression: 모든 b의 값을 제곱해서 더한 값에 lambda를 곱해 적절히 조절해서 더해줌
    + Lasso Regression: 모든 b의 절대값을 더해서 lambda를 곱해 더해줌

\begin{align}
Ridge\ regression = \mathcal{l}(Y^{(i)},\beta_0,\beta_1,...,\beta_p,X^{(i)}) + \lambda\|\beta\|^2 \\
Lasso\ regression = \mathcal{l}(Y^{(i)},\beta_0,\beta_1,...,\beta_p,X^{(i)}) + \lambda\|\beta\|_1
\end{align}
    
- 야구 선수들의 데이터를 활용하는데 18가지의 통계 수치(X)와 연봉(y)이다. 통계 수치들이 연봉에 어떻게 영향을 끼치는지 Multiple Linear Regression을 통해 알아본다.

```
Name,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary
Mel Hall,442,131,18,68,77,33,6,1416,398,47,210,203,136,1,0,233,7,7,550
Jerry Royster,257,66,5,31,26,32,14,3910,979,33,518,324,382,0,1,87,166,14,250
.
.
.
```

## 2. 구현

### 2.1 lambda 찾기

가장 적합한 lambda를 찾는 함수

In [2]:
def get_best_lambda_value_ridge_lasso(data):
    """
    Implement Here
    The grader will call this function to get the lambda value,
    and run the functions with hidden test data.
    Do not write exact value on best_lambda_ridge and best_lambda_lasso.
    You should implement the function to find the best lambda value.
    """
    response_var = -1
    y_vec = data.ix[:, response_var].squeeze()
    x_mat = data.ix[:, 1:-1].as_matrix()
    x_mat = x_mat.reshape(-1, x_mat.shape[1])

    from sklearn.linear_model import RidgeCV, LassoCV

    ridgeregr = RidgeCV(cv=10, alphas=np.logspace(0, 100, 100))
    ridgeregr.fit(x_mat, y_vec)
    lassoregr = LassoCV(cv=10, n_alphas=100)
    lassoregr.fit(x_mat, y_vec)
    best_lambda_ridge = ridgeregr.alpha_
    best_lambda_lasso = lassoregr.alpha_

    return best_lambda_ridge, best_lambda_lasso

### 2.2 Multiple LR

In [3]:
def multi_var_hitter(x_train, x_test, y_train, y_test):
    regr = linear_model.LinearRegression()
    y_train = y_train.squeeze()
    y_test = y_test.squeeze()

    regr.fit(x_train, y_train)
    predicted_y_test = regr.predict(x_test)
    rss = np.sum((predicted_y_test - y_test) ** 2)
    r2 = r2_score(y_test, predicted_y_test)
    mse = mean_squared_error(y_test, predicted_y_test)
    print("Coefficients: {}".format(regr.coef_))
    return rss, r2, mse

### 2.3 Ridge regression

In [4]:
def multi_var_hitter_ridge(x_train, x_test, y_train, y_test, best_lambda):
    """
    Implement Here
    """
    regr = linear_model.Ridge(best_lambda)
    y_train = y_train.squeeze()
    y_test = y_test.squeeze()

    regr.fit(x_train, y_train)
    predicted_y_test = regr.predict(x_test)
    rss = np.sum((predicted_y_test - y_test) ** 2)
    r2 = r2_score(y_test, predicted_y_test)
    mse = mean_squared_error(y_test, predicted_y_test)
    print("Coefficients: {}".format(regr.coef_))
    return rss, r2, mse

### 2.4 Lasso

In [5]:
def multi_var_hitter_lasso(x_train, x_test, y_train, y_test, best_lambda):
    """
    Implement Here
    """
    regr = linear_model.Lasso(best_lambda)

    y_train = y_train.squeeze()
    y_test = y_test.squeeze()
    regr.fit(x_train, y_train)
    predicted_y_test = regr.predict(x_test)

    rss = np.sum((predicted_y_test - y_test) ** 2)
    r2 = r2_score(y_test, predicted_y_test)
    mse = mean_squared_error(y_test, predicted_y_test)
    print("Coefficients: {}".format(regr.coef_))
    return rss, r2, mse

### 2.5 실행

In [6]:
import numpy as np
from sklearn import linear_model
import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error


def main():
    training_data = pd.read_csv("./Hitters.csv", header=0)
    response_var = -1
    y_train = training_data.ix[:, response_var].squeeze()

    x_train = training_data.ix[:, 1:-1].as_matrix()
    x_train = x_train.reshape(-1, x_train.shape[1])

    test_data = pd.read_csv("./Hitters_Test.csv", header=0)
    y_test = test_data.ix[:, response_var].squeeze()

    x_test = test_data.ix[:, 1:-1].as_matrix()
    x_test = x_test.reshape(-1, x_test.shape[1])

    # Linear Regression
    rss, r2, mse = multi_var_hitter(x_train, x_test, y_train, y_test)
    print("Linear Regression Result")
    print("RSS: {}".format(rss))
    print("R^2: {}".format(r2))
    print("MSE: {}".format(mse))
    print()

    # Ridge Regression
    best_lambda_ridge, best_lambda_lasso = get_best_lambda_value_ridge_lasso(training_data)
    rss, r2, mse = multi_var_hitter_ridge(x_train, x_test, y_train, y_test, best_lambda_ridge)
    print("Ridge Regression Result")
    print("RSS: {}".format(rss))
    print("R^2: {}".format(r2))
    print("MSE: {}".format(mse))
    print("Best lambda value: {}".format(best_lambda_ridge))
    print()

    # lasso
    rss, r2, mse = multi_var_hitter_lasso(x_train, x_test, y_train, y_test, best_lambda_lasso)
    print("lasso Result")
    print("RSS: {}".format(rss))
    print("R^2: {}".format(r2))
    print("MSE: {}".format(mse))
    print("Best lambda value: {}".format(best_lambda_lasso))
    print()

if __name__ == "__main__":
    main()




Coefficients: [  -2.15391889    8.55268886    7.29938765   -3.7183511    -1.24205258
    6.89464226   -2.33107018   -0.16102897   -0.41444889   -1.90051069
    2.36160191    1.47000757   -1.09836326  -30.75913147 -157.68650189
    0.2030851     0.47563856   -5.22334094]
Linear Regression Result
RSS: 6064354.855091518
R^2: 0.24157641420430032
MSE: 96259.60087446854

Coefficients: [  2.05757038e-01   8.35063924e-02   1.04531917e-02   4.75325137e-02
   4.17217880e-02   3.94585343e-02  -2.18880318e-03   2.47718490e-02
   1.27345479e-01   4.75903341e-02   1.22248755e-01   1.34699964e-01
   2.44907813e-02   7.14419691e-05  -8.44355900e-04   2.03895708e-01
   2.67533962e-02  -2.18812097e-04]
Ridge Regression Result
RSS: 4945796.651327149
R^2: 0.3814661376936701
MSE: 78504.70875122459
Best lambda value: 11768119.524349991

Coefficients: [-0.          1.72844984  0.          0.          0.          1.28206996
  0.         -0.37538491  0.86815739 -0.          0.9752082   0.68444581
 -0.09191713 

