## 다항 변수에 대한 Ridge Regression - Linear Algebra로 구현. 다항 회귀 분석
### numpy로 구현

In [0]:
import numpy as np
from sklearn import datasets

iris = datasets.load_iris()

In [0]:
X = iris.data
Y = iris.target
Y = Y.reshape((-1, 1))

In [32]:
NUM = 100
Xtr = X[:NUM]
Ytr = Y[:NUM]
Xte = X[NUM:]
Yte = Y[NUM:]
print(Xtr.shape)
print(Xte.shape)

(100, 4)
(50, 4)


In [0]:
def rmse(arr1, arr2):
    return np.sqrt(np.mean((arr1-arr2)**2))

In [0]:

# 주어진 모델로 실행하고, 결과 성능 출력하기
from sklearn.metrics import mean_squared_error, r2_score

def exe_regression(model, Xtr, Ytr, Xte, Yte):
  model.fit(Xtr, Ytr)
  Y2 = model.predict(Xte)
  rmse_v = rmse(Yte, Y2)
  print("RMSE: %.4f" % (rmse_v))
  print("MSE: %.4f" % mean_squared_error(Yte, Y2))
  print('R2: %.4f' % r2_score(Yte, Y2))

In [0]:
"""
scikit-learn의 LinearRegression()을 직접 만들어 보기
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html
"""
def my_ridge_regrssion_with_la(X, Y, alpha=1.0, debug=False):  
  # X.shape[0] - 행의 갯수
  rows = X.shape[0]
  X = X.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
  Y = Y.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
      
  xx = X.T.dot(X) # X*X
  A = np.eye(xx.shape[0])
  A[0,0] = 0 # the bias term θ0 is not regularized
  print(A)
  xx = xx + alpha * A
  xx_inv = np.linalg.inv(xx)  # (X*X)^-1
  xy = X.T.dot(Y) # X*Y
  
  W = np.matmul(xx_inv, xy) # XY/XX
  W = W.reshape((-1))
  if debug: print(xx_inv, xy)
  return W


In [0]:
### 내 regression 클래스 만들기
class MyRegressWithLa(object):
  #def __init__(self):
  #  self.w =  None
    
  def fit(self, X, Y):
    rows = X.shape[0] # X.shape[0] - 행의 갯수
    X = X.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
    Y = Y.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
    
    # bias 에 해당하는 [1,1,1, ..,1] 칼럼을 X에 추가한다.
    Xb = np.append(np.ones((rows, 1)), X, axis=1)
    self.w = my_ridge_regrssion_with_la(Xb, Y)
    
  def predict(self, X):
    rows = X.shape[0] # X.shape[0] - 행의 갯수
    Xb = np.append(np.ones((rows, 1)), X, axis=1)
    pred = np.matmul(Xb, self.w)
    return pred

In [42]:
from sklearn.linear_model import LinearRegression, Ridge

mymodel = MyRegressWithLa()
skmodel = Ridge(alpha=1.0, fit_intercept=True, solver='cholesky', tol=0.1)

print('=== scikit-learn Regression')
exe_regression(skmodel, Xtr, Ytr, Xte, Yte)
print('W', skmodel.get_params())
print('W', skmodel.coef_, skmodel.intercept_)

print('=== My Regression')
exe_regression(mymodel, Xtr, Ytr, Xte, Yte)
print('W', mymodel.w)



  

=== scikit-learn Regression
RMSE: 0.6368
MSE: 0.4055
R2: 0.0000
W {'alpha': 1.0, 'copy_X': True, 'fit_intercept': True, 'max_iter': None, 'normalize': False, 'random_state': None, 'solver': 'cholesky', 'tol': 0.1}
W [[-0.02831036 -0.15751638  0.23776063  0.19955926]] [0.3059425]
=== My Regression
[[0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
RMSE: 0.6368
MSE: 0.4055
R2: 0.0000
W [ 0.3059425  -0.02831036 -0.15751638  0.23776063  0.19955926]


In [38]:
import numpy as np
A = np.asmatrix(np.random.rand(10,3))
b = np.asmatrix(np.random.rand(10,1))
I = np.identity(A.shape[1])
alpha = 1
x = np.linalg.inv(A.T*A + alpha * I)*A.T*b
print (x.T)


from sklearn.linear_model import Ridge
model = Ridge(alpha = alpha, tol=0.1, fit_intercept=False).fit(A ,b)

print(model.coef_, model.intercept_)

[[0.2337478  0.3369713  0.28724427]]
[[0.2337478  0.3369713  0.28724427]] 0.0


In [27]:
import numpy as np
A = np.asmatrix(np.random.rand(10,3))
A2 = np.c_[np.ones((10, 1)), A]
b = np.asmatrix(np.random.rand(10,1))
I = np.identity(A2.shape[1])
I[0,0] = 0
print(I)
alpha = 1
x = np.linalg.inv(A2.T*A2 + alpha * I)*A2.T*b
print (x.T)


from sklearn.linear_model import Ridge
model = Ridge(alpha = alpha, tol=0.1, fit_intercept=True).fit(A ,b)

print(model.coef_, model.intercept_)

[[0. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[ 0.54667466 -0.10778718  0.10071568 -0.05677392]]
[[-0.10778718  0.10071568 -0.05677392]] [0.54667466]
