# VotingRegression(보팅 회귀)
- 여러 회귀모형 알고리즘을 결합하고 평균 예측 값을 반환한다.

### 1.1 패키지 로딩

In [1]:
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import pandas as pd
import numpy as np

### 1.2 데이터 로딩, 스케일링, 분할

In [14]:
airport = pd.read_csv('Airport Codes Dataset.csv')
airport_rename = airport.rename(columns={'city':'Ori-city','state':'Ori-state','name':'Ori-airport'})
airport_rename.head()

Unnamed: 0,airport_id,Ori-city,Ori-state,Ori-airport
0,10165,Adak Island,AK,Adak
1,10299,Anchorage,AK,Ted Stevens Anchorage International
2,10304,Aniak,AK,Aniak Airport
3,10754,Barrow,AK,Wiley Post/Will Rogers Memorial
4,10551,Bethel,AK,Bethel Airport


In [16]:
flight = pd.read_csv('Flight on-time performance.csv')
flight.head()

Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,Carrier,OriginAirportID,DestAirportID,CRSDepTime,DepTimeBlk,DepDelay,DepDel15,CRSArrTime,ArrTimeBlk,ArrDelay,ArrDel15,Cancelled,Diverted
0,2011,4,10,6,4,WN,13495,12191,1435,1400-1459,2.0,0.0,1550,1500-1559,-6.0,0.0,0,0
1,2011,4,10,6,4,WN,13495,12191,1330,1300-1359,-4.0,0.0,1445,1400-1459,-12.0,0.0,0,0
2,2011,4,10,6,4,WN,13495,12191,1030,1000-1059,-2.0,0.0,1145,1100-1159,-14.0,0.0,0,0
3,2011,4,10,6,4,WN,13495,12889,1900,1900-1959,0.0,0.0,2055,2000-2059,-6.0,0.0,0,0
4,2011,4,10,6,4,WN,13495,12889,1340,1300-1359,-1.0,0.0,1530,1500-1559,2.0,0.0,0,0


In [19]:
flight_outer = pd.merge(airport_rename,flight,left_on='airport_id',right_on='OriginAirportID')
print(flight_outer.shape)

(504397, 22)


In [5]:
airport_outer = airport.rename()

In [10]:
X_train,X_test,y_train,y_test = train_test_split(scaled_X,y,test_size=0.3,random_state=10)

### 1.3 모델 생성

In [11]:
lasso = Lasso(alpha=0.03)
ridge = Ridge(alpha=1)
linear = LinearRegression()

vc_r = VotingRegressor(estimators=[('LASSO',lasso),('RIDGE',ridge),('LINEAR',linear)])
vc_r.fit(X_train,y_train)

### 1.4 예측 및 평가

In [13]:
y_hat = vc_r.predict(X_test)
r_square = vc_r.score(X_test,y_test)
print(f'결정계수:{r_square:.3f}')
rmse = np.sqrt(metrics.mean_squared_error(y_test,y_hat))
print(f'RMSE:{rmse:.3f}')

결정계수:0.699
RMSE:5.425


# 2 VotingClassifier(보팅 분류)
### 2.1 패키지 로딩

In [19]:
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pandas as pd
import numpy as np

### 2.2 데이터 로딩, 정규화, 분할

In [20]:
X,y = load_breast_cancer(return_X_y=True)

scaled_X = StandardScaler().fit_transform(X)

In [21]:
X_train,X_test,y_train,y_test = train_test_split(scaled_X,y,train_size=0.7,random_state=0,stratify=y)

### 2.3 모델 생성 및 학습

In [22]:
logistic = LogisticRegression()
knn = KNeighborsClassifier()

# voting: 하드보팅은 'hard', 소프트보팅은 'soft값 전달 (default:'hard')
vo_c = VotingClassifier(estimators=[('LOGISTIC',logistic),('KNN',knn)],
voting='soft')
vo_c.fit(X_train,y_train)

### 2.4 예측 및 평가

In [24]:
y_hat = vo_c.predict(X_test)
print(f'정확도: {metrics.accuracy_score(y_test,y_hat):.3f}')
auc = metrics.roc_auc_score(y_test,vo_c.predict_proba(X_test)[:,1])
print(f'AUC: {auc:.3f}')

정확도: 0.947
AUC: 0.993


# 3. GradientBoostringClassifier (부스팅 분류)
- 랜덤 포레스트와 같이 의사결정 나무 모델을 부스팅 방법으로 활용하는 모델
- 이전 예측기가 만든 잔여오차(residual error)에 새로운 예측기로 학습시킴

### 3.1 패키지 로딩

In [26]:
from sklearn.ensemble import GradientBoostingClassifier

### 3.2 로딩 정규화 분할

In [32]:
X,y = load_breast_cancer(return_X_y=True)
scaled_X = StandardScaler().fit_transform(X)
X_train,X_test,y_train,y_test = train_test_split(scaled_X,y,train_size=0.7,random_state=0,stratify=y)

### 3.3 모델 생성 및 학습

In [33]:
gb_c = GradientBoostingClassifier(random_state=0)
gb_c.fit(X_train,y_train)

### 3.4 예측 및 평가

In [37]:
y_hat = gb_c.predict(X_test)
print(f'정확도: {metrics.accuracy_score(y_test,y_hat):.3f}')
auc = metrics.roc_auc_score(y_test,gb_c.predict_proba(X_test)[:,1])
print(f'auc: {auc:.3f}')

정확도: 0.942
auc: 0.987


# 4. GradiendtBoostringRegressor (부스팅 회귀)