## 다항 변수에 대한 regression. 다항 회귀 분석

In [0]:
def prn_shape(*args):
  print(' '.join([str(arg.shape) for arg in args]))

In [0]:
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()

In [0]:
X = iris.data
Y = iris.target
Y = Y.reshape((-1, 1))

In [5]:
NUM = 100
select = np.random.permutation(150)
Xtr, Ytr = X[select[:NUM]], Y[select[:NUM]]
Xte, Yte = X[select[NUM:]], Y[select[NUM:]]
print(Xtr.shape)
print(Xte.shape)

(100, 4)
(50, 4)


In [0]:
def rmse(arr1, arr2):
    return np.sqrt(np.mean((arr1-arr2)**2))

In [0]:

# 주어진 모델로 실행하고, 결과 성능 출력하기
from sklearn.metrics import mean_squared_error, r2_score

def exe_regression(model, Xtr, Ytr, Xte, Yte):
  prn_shape(Xtr, Ytr, Xte, Yte)
  model.fit(Xtr, Ytr)
  Y2 = model.predict(Xte)
  #print('Y2.shape', Y2.shape)
  rmse_v = rmse(Yte, Y2)
  print("RMSE: %.4f" % (rmse_v))
  print("MSE: %.4f" % mean_squared_error(Yte, Y2))
  print('R2: %.4f' % r2_score(Yte, Y2))

In [0]:
"""
Gradient Descent 만들기
"""
def my_regrssion_with_gr(X, Y, debug=False):
  m = X.shape[0]
  dim = X.shape[1]
  
  lr = 0.014 # learning rate (eta)
  epochs = 30000
  
  W = np.random.randn(dim, 1)
  for i in range(epochs):     
      y2 = np.matmul(X, W) # y2 == predicted y
      err = y2 - Y # error, loss, residual (오차)
      grad = np.matmul(X.T, err) 
      gradients = float(2/m) * grad  # mean of gradients
      W = W - lr * gradients
      sum_of_each_grad = np.sum(np.abs(gradients))
      if sum_of_each_grad < 0.00004:
        print('Stop at epoch', i)
        break
      #break
  return W


In [0]:
### 내 regression 클래스 만들기
class MyRegressWithGradientDescent(object):    
  def fit(self, X, Y):
    rows = X.shape[0] # X.shape[0] - 행의 갯수
    X = X.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
    Y = Y.reshape((rows, -1)) # 1차원벡터인 경우, 행열로 변환
    
    # bias 에 해당하는 [1,1,1, ..,1] 칼럼을 X에 추가한다.
    Xb = np.append(X, np.ones((rows, 1)), axis=1)
    self.w = my_regrssion_with_gr(Xb, Y)
    print('W', self.w)
    
  def predict(self, X):
    rows = X.shape[0] # X.shape[0] - 행의 갯수
    Xb = np.append(X, np.ones((rows, 1)), axis=1)
    pred = np.matmul(Xb, self.w)
    return pred

In [10]:
from sklearn.linear_model import LinearRegression

mymodel = MyRegressWithGradientDescent()
skmodel = LinearRegression()

print('=== My Regression')
exe_regression(mymodel, Xtr, Ytr, Xte, Yte)


print('=== scikit-learn Regression')
exe_regression(skmodel, Xtr, Ytr, Xte, Yte)
print('W', skmodel.coef_, skmodel.intercept_)

  

=== My Regression
(100, 4) (100, 1) (50, 4) (50, 1)
Stop at epoch 24294
W [[-0.03303039]
 [-0.09203165]
 [ 0.15066786]
 [ 0.69078802]
 [ 0.06796997]]
RMSE: 0.2407
MSE: 0.0580
R2: 0.9095
=== scikit-learn Regression
(100, 4) (100, 1) (50, 4) (50, 1)
RMSE: 0.2408
MSE: 0.0580
R2: 0.9095
W [[-0.03272849 -0.09186638  0.15063349  0.69065289]] [0.06597067]
