# 선형 회귀 분석

### 데이터 불러오기

In [2]:
from sklearn import datasets
raw_boston = datasets.load_boston()

### 피처, 타깃 데이터 지정

In [3]:
X = raw_boston.data
y = raw_boston.target

### 트레이닝/테스트 데이터 분할

In [4]:
from sklearn.model_selection import train_test_split
X_tn, X_te, y_tn, y_te = train_test_split(X, y, random_state=1)

### 데이터 표준화

In [8]:
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler()
std_scale.fit(X_tn)
X_tn_std = std_scale.transform(X_tn)
X_te_std = std_scale.transform(X_te)

## 데이터 학습 (선형 회귀 분석)

In [10]:
from sklearn.linear_model import LinearRegression
clf_lr = LinearRegression()
clf_lr.fit(X_tn_std, y_tn)

LinearRegression()

### 선형 회귀 분석 계수, 상수항 확인

In [11]:
print(clf_lr.coef_)

[-1.07145146  1.34036243  0.26298069  0.66554537 -2.49842551  1.97524314
  0.19516605 -3.14274974  2.66736136 -1.80685572 -2.13034748  0.56172933
 -4.03223518]


In [12]:
print(clf_lr.intercept_)

22.344591029023768


## 데이터 학습 (L2 제약식 적용, 릿지 회귀 분석)

In [13]:
from sklearn.linear_model import Ridge
clf_ridge = Ridge(alpha=1)
clf_ridge.fit(X_tn_std, y_tn)

Ridge(alpha=1)

### 릿지 회귀 분석 계수, 상수항 확인

In [14]:
print(clf_ridge.coef_)

[-1.05933451  1.31050717  0.23022789  0.66955241 -2.45607567  1.99086611
  0.18119169 -3.09919804  2.56480813 -1.71116799 -2.12002592  0.56264409
 -4.00942448]


In [15]:
print(clf_ridge.intercept_)

22.344591029023768


## 데이터 학습 (L1 제약식 적용, 라쏘 회귀 분석)

In [17]:
from sklearn.linear_model import Lasso
clf_lasso = Lasso(alpha=0.01)
clf_lasso.fit(X_tn_std, y_tn)

Lasso(alpha=0.01)

### 라쏘 회귀 분석 계수, 상수항 확인

In [18]:
print(clf_lasso.coef_)

[-1.04326518  1.27752711  0.1674367   0.66758228 -2.41559964  1.99244179
  0.14733958 -3.09473711  2.46431135 -1.60552274 -2.11046422  0.55200229
 -4.00809905]


In [19]:
print(clf_lasso.intercept_)

22.344591029023768


## 데이터 학습(엘라스틱 넷)

In [21]:
from sklearn.linear_model import ElasticNet
clf_elastic = ElasticNet(alpha=0.01, l1_ratio=0.01)
clf_elastic.fit(X_tn_std, y_tn)

ElasticNet(alpha=0.01, l1_ratio=0.01)

### 엘라스틱 넷 계수, 상수항 확인

In [22]:
print(clf_elastic.coef_)

[-1.02916603  1.23681955  0.15236504  0.67859622 -2.34646781  2.02965524
  0.14575132 -2.98592423  2.32013379 -1.48829485 -2.09271972  0.56506801
 -3.9495281 ]


In [23]:
print(clf_elastic.intercept_)

22.344591029023768


### 데이터 예측

In [24]:
pred_lr = clf_lr.predict(X_te_std)
pred_ridge = clf_ridge.predict(X_te_std)
pred_lasso = clf_lasso.predict(X_te_std)
pred_elastic = clf_elastic.predict(X_te_std)

### 모형 평가 - R 제곱값

In [25]:
from sklearn.metrics import r2_score
print(r2_score(y_te, pred_lr))

0.7789410172622858


In [26]:
print(r2_score(y_te, pred_ridge))

0.7789704562726603


In [27]:
print(r2_score(y_te, pred_lasso))

0.7787621490259895


In [28]:
print(r2_score(y_te, pred_elastic))

0.7787876079239252


### 모형 평가 - MSE

In [29]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_te, pred_lr))

21.89776539604949


In [30]:
print(mean_squared_error(y_te, pred_ridge))

21.894849212618773


In [31]:
print(mean_squared_error(y_te, pred_lasso))

21.915483810504824


In [32]:
print(mean_squared_error(y_te, pred_elastic))

21.91296189093687
