## boston
---
- https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_boston.html?highlight=boston#sklearn.datasets.load_boston

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

In [2]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"

In [3]:
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)

In [4]:
data = pd.DataFrame(np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]))

In [5]:
data.columns=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']

In [6]:
target = pd.DataFrame(raw_df.values[1::2, 2])

In [7]:
target.columns=['MEDV']

In [8]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [9]:
target.head()

Unnamed: 0,MEDV
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


In [10]:
# 학습, 테스트 데이터 분리
train_X, test_X, train_y, test_y = train_test_split(data,
                                                   target,
                                                   test_size=0.2)

In [11]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression().fit(train_X, train_y)
lr.coef_

array([[-1.18641335e-01,  4.52533372e-02,  4.21251238e-02,
         3.52842390e+00, -1.45500760e+01,  4.28247871e+00,
        -1.65274900e-02, -1.45681950e+00,  2.91616083e-01,
        -1.20314327e-02, -9.03774886e-01,  1.19655456e-02,
        -4.92593575e-01]])

In [12]:
print(lr.score(train_X, train_y))
print(lr.score(test_X, test_y))

0.7629776234250627
0.6401279424256565


In [13]:
from sklearn.linear_model import Lasso

lasso=Lasso().fit(train_X, train_y)
print(lasso.coef_)
print(lasso.score(train_X, train_y))
print(lasso.score(test_X, test_y))

[-0.07632298  0.04615295 -0.          0.         -0.          1.35094834
  0.0035022  -0.75411443  0.23723148 -0.01350985 -0.728928    0.0102491
 -0.71111969]
0.7048403083264858
0.6341138331272932


In [14]:
from sklearn.preprocessing import StandardScaler
# 변형 객체 생성
std_scaler = StandardScaler()

# 훈련데이터의 모수 분포 저장
std_scaler.fit(train_X)

# 훈련 데이터 스케일링
train_X_scaled = std_scaler.transform(train_X)

# 테스트 데이터의 스케일링
test_X_scaled = std_scaler.transform(test_X)

In [15]:
from sklearn.linear_model import Ridge

ridge=Ridge().fit(train_X, train_y)
print('훈련 세트 점수: {:.2f}'.format(ridge.score(train_X_scaled, train_y)))
print('테스트 세트 점수: {:.2f}'.format(ridge.score(test_X_scaled, test_y)))

훈련 세트 점수: 0.25
테스트 세트 점수: 0.08




In [16]:
ridge10=Ridge(alpha=10).fit(train_X,train_y)
print('훈련 세트 점수: {:.2f}'.format(ridge10.score(train_X_scaled, train_y)))
print('테스트 세트 점수: {:.2f}'.format(ridge10.score(test_X_scaled, test_y)))

훈련 세트 점수: 0.56
테스트 세트 점수: 0.45




In [17]:
ridge01=Ridge(alpha=0.1).fit(train_X,train_y)
print('훈련 세트 점수: {:.2f}'.format(ridge01.score(train_X_scaled, train_y)))
print('테스트 세트 점수: {:.2f}'.format(ridge01.score(test_X_scaled, test_y)))

훈련 세트 점수: -1.11
테스트 세트 점수: -1.23




In [18]:
from sklearn.linear_model import Ridge

ridge=Ridge().fit(train_X, train_y)
print('훈련 세트 점수: {:.2f}'.format(ridge.score(train_X, train_y)))
print('테스트 세트 점수: {:.2f}'.format(ridge.score(test_X, test_y)))

훈련 세트 점수: 0.76
테스트 세트 점수: 0.63


In [19]:
ridge10=Ridge(alpha=10).fit(train_X,train_y)
print('훈련 세트 점수: {:.2f}'.format(ridge10.score(train_X, train_y)))
print('테스트 세트 점수: {:.2f}'.format(ridge10.score(test_X, test_y)))

훈련 세트 점수: 0.76
테스트 세트 점수: 0.63


In [20]:
ridge01=Ridge(alpha=0.1).fit(train_X,train_y)
print('훈련 세트 점수: {:.2f}'.format(ridge01.score(train_X, train_y)))
print('테스트 세트 점수: {:.2f}'.format(ridge01.score(test_X, test_y)))

훈련 세트 점수: 0.76
테스트 세트 점수: 0.64
