## 회귀는 꼭 선형회귀로 해야만 하는가?

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.datasets import load_boston
boston = load_boston()

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    boston.data, boston.target, test_size=0.1, random_state=2021
)

### 1. 선형회귀

In [4]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [5]:
pred_lr = lr.predict(X_test)

In [6]:
from sklearn.metrics import mean_squared_error
mse_lr = mean_squared_error(y_test, pred_lr)
mse_lr

22.367414529953578

### 2.Decision Tree

In [7]:
X_test = X_test.astype('int')

In [8]:
from sklearn.tree import DecisionTreeRegressor
dtr = DecisionTreeRegressor(random_state=2021)
dtr.fit(X_train, y_train)
pred_dt = dtr.predict(X_test)
mse_dt = mean_squared_error(y_test, pred_dt)
mse_dt

48.47803921568627

### 3. Support Vector Machine

In [9]:
from sklearn.svm import SVR
svr = SVR()
svr.fit(X_train, y_train)
pred_sv = svr.predict(X_test)
mse_sv = mean_squared_error(y_test, pred_sv)
mse_sv

58.04213843824236

### 4. Random Forest

In [10]:
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state=2021)
rfr.fit(X_train, y_train)
pred_rf = rfr.predict(X_test)
mse_rf = mean_squared_error(y_test, pred_rf)
mse_rf

38.822149843137254

### 5. XGBoost

In [11]:
from xgboost import XGBRegressor
xgb = XGBRegressor()
xgb.fit(X_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [12]:
pred_xg = xgb.predict(X_test)
mse_xg = mean_squared_error(y_test, pred_xg)
mse_xg

59.06125763271705

- 비교

In [13]:
df = pd.DataFrame({
    'y_test':y_test, 'LR':pred_lr, 'DT':pred_dt,
    'SVM':pred_sv, 'RF':pred_rf, 'XGB':pred_xg
})
df.head()

Unnamed: 0,y_test,LR,DT,SVM,RF,XGB
0,21.7,22.760899,19.5,22.638948,19.891,17.214937
1,15.6,15.794844,19.5,22.00996,18.874,15.951963
2,20.0,22.258961,15.3,23.019416,19.383,18.816679
3,12.8,13.170101,13.8,15.684667,15.351,22.460041
4,50.0,36.790027,41.3,19.929748,42.043,46.390953


In [14]:
print(mse_lr, mse_dt, mse_sv, mse_rf, mse_xg) 

22.367414529953578 48.47803921568627 58.04213843824236 38.822149843137254 59.06125763271705
