# Executing multiple ML models in a chain
---
* Description:

>Model chains are defined by two or more models that execute sequentially, where the outputs of one model become the inputs to the next model down the chain.

* Direction of improvement:

>Build chain model by class.

---

## 1. Import Libraries

In [1]:
# basic
import model_chains_config
import pandas as pd
from sklearn.pipeline import Pipeline
import pickle

# preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

# model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from catboost import CatBoostRegressor

%matplotlib inline
pd.set_option('display.float_format',lambda x: '%.2f' % x)

In [2]:
# import warnings
# warnings.filterwarnings('ignore')

In [3]:
def col_types(data):
    column_types = data.dtypes.to_frame().reset_index()
    column_types.columns = ['column_name', 'type']
    column_types.sort_values( by='type', inplace = True)
    return column_types

def missing_counts(data):
    missing = data.isnull().sum()
    missing = missing[missing >0]
    missing.sort_values(inplace=True)
    missing_count = pd.dataFrame({'column_name': missing.index, 'missing_count': missing.values})
    missing_count['percentage(%)'] = missing_Count['missing_count'].apply(lambda x:round(x/data.shape[0]*100,2))
    return missing_Count

In [4]:
with open('../Data/meta/train_x_after_feature_engineering.pickle', 'rb') as f:
    data_x = pickle.load(f)
data_x.head()

Unnamed: 0,clean_pressure11,clean_pressure23,clean_pressure31,clean_pressure41,clean_pressure51,clean_pressure52,clean_pressure61,clean_pressure62,clean_pressure72,clean_pressure81,...,oven_a1_group,oven_b3_group,painting_g2_act_f_air_group,painting_g4_act_hvc_group,painting_g10_act_hvc_group,painting_g11_act_hvc_group,env_rpi05_temp_group,env_rpi07_pm25_group,env_rpi14_pm1_group,env_rpi15_pm1_group
0,1.0,-2.48,-0.84,2.36,2.17,-2.98,1.91,-0.77,2.95,1.95,...,3,1,1,1,2,1,1,0,2,3
4,1.01,-2.49,-0.85,2.36,2.18,-2.98,1.91,-0.77,2.94,1.82,...,3,1,1,1,2,1,1,3,2,3
5,1.01,-2.49,-0.85,2.36,2.18,-2.98,1.91,-0.77,2.94,1.82,...,3,1,1,1,2,1,1,3,2,3
6,1.01,-2.49,-0.85,2.36,2.18,-2.98,1.91,-0.77,2.94,1.82,...,3,1,1,1,2,1,1,3,2,3
7,1.01,-2.49,-0.85,2.36,2.18,-2.98,1.91,-0.77,2.94,1.82,...,3,1,1,1,2,1,1,3,2,3


In [5]:
with open('../Data/meta/train_y_after_feature_engineering.pickle', 'rb') as f:
    data_y = pickle.load(f)
data_y.head()

Unnamed: 0,sensor_point5_i_value,sensor_point6_i_value,sensor_point7_i_value,sensor_point8_i_value,sensor_point9_i_value,sensor_point10_i_value
0,-2.92,-1.35,-1.12,-1.72,-0.76,-2.84
4,-1.74,-0.55,-1.91,-1.27,-1.21,-1.63
5,-2.92,-0.77,-1.04,-1.14,-1.61,-1.69
6,-2.45,-0.77,-0.72,-1.72,-0.6,-2.66
7,-2.37,-1.2,-1.28,-2.08,-1.96,-2.24


In [6]:
# onehot encoding
for index , col in enumerate(data_x.columns.tolist()):
    if data_x[col].dtype == 'object':
        data_x = data_x.join(pd.get_dummies(data_x[col], prefix=col))
        data_x = data_x.drop(col, axis = 1)

In [7]:
display(col_types(data_x))

Unnamed: 0,column_name,type
121,env_rpi15_pm1_group_4,uint8
89,painting_g4_act_hvc_group_3,uint8
88,painting_g4_act_hvc_group_2,uint8
87,painting_g4_act_hvc_group_1,uint8
86,painting_g4_act_hvc_group_0,uint8
...,...,...
33,painting_g6_act_a_air,float64
32,painting_g5_act_f_air,float64
31,painting_g5_act_a_air,float64
59,env_rpi15_hum,float64


In [8]:
# lasso_params = {'sensor_point5_i_value': {'alpha': 500}, 
#                 'sensor_point6_i_value': {'alpha': 100}, 
#                 'sensor_point7_i_value': {'alpha': 500}, 
#                 'sensor_point8_i_value': {'alpha': 500}, 
#                 'sensor_point9_i_value': {'alpha': 500}, 
#                 'sensor_point10_i_value': {'alpha': 500}}

# ridge_params = {'sensor_point5_i_value': {'alpha': 0.1}, 
#                 'sensor_point6_i_value': {'alpha': 0.1}, 
#                 'sensor_point7_i_value': {'alpha': 0.1}, 
#                 'sensor_point8_i_value': {'alpha': 0.1}, 
#                 'sensor_point9_i_value': {'alpha': 1}, 
#                 'sensor_point10_i_value': {'alpha': 0.5}}

# xgb_params = {'sensor_point5_i_value': {'learning_rate': 0.2, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 30, 'random_state': 1}, 
#               'sensor_point6_i_value': {'learning_rate': 0.2, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 30, 'random_state': 1}, 
#               'sensor_point7_i_value': {'learning_rate': 0.2, 'max_depth': 3, 'min_child_weight': 10, 'n_estimators': 500, 'random_state': 1}, 
#               'sensor_point8_i_value': {'learning_rate': 0.2, 'max_depth': 3, 'min_child_weight': 3, 'n_estimators': 30, 'random_state': 1}, 
#               'sensor_point9_i_value': {'learning_rate': 0.2, 'max_depth': 5, 'min_child_weight': 10, 'n_estimators': 500, 'random_state': 1}, 
#               'sensor_point10_i_value': {'learning_rate': 0.2, 'max_depth': 20, 'min_child_weight': 10, 'n_estimators': 30, 'random_state': 1}}

kn_params = {
    'sensor_point5_i_value': {'leaf_size': 20, 'n_neighbors': 7, 'weights': 'distance'}, 
    'sensor_point6_i_value': {'leaf_size': 20, 'n_neighbors': 7, 'weights': 'distance'}, 
    'sensor_point7_i_value': {'leaf_size': 20, 'n_neighbors': 7, 'weights': 'distance'}, 
    'sensor_point8_i_value': {'leaf_size': 20, 'n_neighbors': 7, 'weights': 'distance'}, 
    'sensor_point9_i_value': {'leaf_size': 20, 'n_neighbors': 7, 'weights': 'distance'}, 
    'sensor_point10_i_value': {'leaf_size': 20, 'n_neighbors': 7, 'weights': 'distance'}
}

ada_params = {
    'sensor_point5_i_value': {'learning_rate': 0.2, 'n_estimators': 100, 'random_state': 1}, 
    'sensor_point6_i_value': {'learning_rate': 0.2, 'n_estimators': 100, 'random_state': 1}, 
    'sensor_point7_i_value': {'learning_rate': 0.5, 'n_estimators': 100, 'random_state': 1}, 
    'sensor_point8_i_value': {'learning_rate': 0.5, 'n_estimators': 300, 'random_state': 1}, 
    'sensor_point9_i_value': {'learning_rate': 0.2, 'n_estimators': 300, 'random_state': 1}, 
    'sensor_point10_i_value': {'learning_rate': 0.2, 'n_estimators': 100, 'random_state': 1}
}

cat_params = {
    'sensor_point5_i_value': {'depth': 8, 'eval_metric': 'RMSE', 'iterations': 500, 'l2_leaf_reg': 3, 'learning_rate': 0.03, 'random_state': 1}, 
    'sensor_point6_i_value': {'depth': 2, 'eval_metric': 'RMSE', 'iterations': 1000, 'l2_leaf_reg': 0.5, 'learning_rate': 0.03, 'random_state': 1}, 
    'sensor_point7_i_value': {'depth': 5, 'eval_metric': 'RMSE', 'iterations': 500, 'l2_leaf_reg': 3, 'learning_rate': 0.03, 'random_state': 1}, 
    'sensor_point8_i_value': {'depth': 5, 'eval_metric': 'RMSE', 'iterations': 500, 'l2_leaf_reg': 3, 'learning_rate': 0.03, 'random_state': 1}, 
    'sensor_point9_i_value': {'depth': 8, 'eval_metric': 'RMSE', 'iterations': 500, 'l2_leaf_reg': 0.5, 'learning_rate': 0.03, 'random_state': 1}, 
    'sensor_point10_i_value': {'depth': 8, 'eval_metric': 'RMSE', 'iterations': 500, 'l2_leaf_reg': 0.5, 'learning_rate': 0.03, 'random_state': 1}
}


In [9]:
order_list = [5, 3, 2, 1, 4, 0]

model_list = [
    Pipeline([
        ('poly',PolynomialFeatures(degree = 2)),
        ('std_scaler', StandardScaler()),
        ('Cat', CatBoostRegressor( **cat_params[model_chains_config.data_y_col[0]], verbose=False ))
    ]),          
    Pipeline([
        ('poly',PolynomialFeatures(degree = 2)),
        ('std_scaler', StandardScaler()),
        ('Cat', CatBoostRegressor( **cat_params[model_chains_config.data_y_col[1]], verbose=False ))
    ]),   
    Pipeline([
        ('poly',PolynomialFeatures(degree = 2)),
        ('std_scaler', StandardScaler()),
        ('KNR', KNeighborsRegressor( **kn_params[model_chains_config.data_y_col[2]] ))
    ]),
    Pipeline([
        ('poly',PolynomialFeatures(degree = 2)),
        ('std_scaler', StandardScaler()),
        ('KNR', KNeighborsRegressor(**kn_params[model_chains_config.data_y_col[3]] ))
    ]),
    Pipeline([
        ('poly',PolynomialFeatures(degree = 2)),
        ('std_scaler', StandardScaler()),
        ('Ada', AdaBoostRegressor(**ada_params[model_chains_config.data_y_col[4]] ))
    ]),
    Pipeline([
        ('poly',PolynomialFeatures(degree = 2)),
        ('std_scaler', StandardScaler()),
        ('Cat', CatBoostRegressor(**cat_params[model_chains_config.data_y_col[5]], verbose=False ))
    ])
]

data_y_col = ['sensor_point5_i_value', 'sensor_point6_i_value', 'sensor_point7_i_value',
              'sensor_point8_i_value', 'sensor_point9_i_value', 'sensor_point10_i_value']

In [10]:
x_pt = data_x.copy()
predict_y_train = pd.DataFrame({})
chain_model_dic = {}
for k in range(6):
    # 按照排列順序訓練模型
    chain_model_dic['model_y' + str(order_list[k])] = model_list[order_list[k]]
    chain_model_dic['model_y' + str(order_list[k])].fit(x_pt, data_y.iloc[:,order_list[k]])
             
    # 儲存預測結果
    predict_y_train[data_y_col[order_list[k]]] = chain_model_dic['model_y' + str(order_list[k])].predict(x_pt)
        
    # 把取得的預測值當作變數放進X
    x_pt[data_y_col[order_list[k]]] = chain_model_dic['model_y' + str(order_list[k])].predict(x_pt)

In [11]:
x_pt.shape

(616, 128)

In [12]:
# 調整欄位順序
predict_y_train = predict_y_train[data_y_col]
predict_y_train

Unnamed: 0,sensor_point5_i_value,sensor_point6_i_value,sensor_point7_i_value,sensor_point8_i_value,sensor_point9_i_value,sensor_point10_i_value
0,-2.71,-1.14,-1.12,-1.72,-1.09,-2.67
1,-2.40,-0.93,-1.24,-1.55,-1.19,-2.09
2,-2.40,-0.93,-1.24,-1.55,-1.19,-2.09
3,-2.40,-0.93,-1.24,-1.55,-1.19,-2.09
4,-2.40,-0.93,-1.24,-1.55,-1.19,-2.09
...,...,...,...,...,...,...
611,1.21,0.04,0.35,0.49,1.30,0.45
612,1.27,-0.08,0.11,0.57,1.36,0.74
613,1.27,-0.08,0.11,0.57,1.36,0.74
614,1.69,0.30,1.50,0.23,1.80,0.35


## Save model

In [13]:
with open('../model/chain_model_dic.pickle', 'wb') as f:
    pickle.dump(chain_model_dic, f)

In [14]:
# # 儲存預測結果(train)
# with open('../Output/predict_y_train(Albert).pickle', 'wb') as f:
#     pickle.dump(predict_y_train, f)

# # 儲存Model
# with open('../model/model_Y0(Albert).pickle', 'wb') as f:
#     pickle.dump(model_Y0, f)
    
# with open('../model/model_Y1(Albert).pickle', 'wb') as f:
#     pickle.dump(model_Y1, f)
    
# with open('../model/model_Y2(Albert).pickle', 'wb') as f:
#     pickle.dump(model_Y2, f)
    
# with open('../model/model_Y3(Albert).pickle', 'wb') as f:
#     pickle.dump(model_Y3, f)
    
# with open('../model/model_Y4(Albert).pickle', 'wb') as f:
#     pickle.dump(model_Y4, f)
    
# with open('../model/model_Y5(Albert).pickle', 'wb') as f:
#     pickle.dump(model_Y5, f)

## Predictions

In [15]:
# 迴圈依據套用模型計算預測值(Y)，並將預測結果放入X中做為新變數供下一輪模型預測時使用。(最佳模型順序[5, 3, 2, 1, 4, 0])

x_pt = data_x.copy()
predict_y_train = pd.DataFrame()

for k in range(6):
    model_order_li = order_list[k]
    col = data_y_col[model_order_li]
    
    predict_res = chain_model_dic['model_y'+ str(order_list[k])].predict(x_pt)
    predict_y_train[col] = predict_res
    # 新增 y 結果為 feature
    x_pt[col] = predict_res

In [16]:
# 調整欄位順序
predict_y_train = predict_y_train[data_y_col]
predict_y_train.head()

Unnamed: 0,sensor_point5_i_value,sensor_point6_i_value,sensor_point7_i_value,sensor_point8_i_value,sensor_point9_i_value,sensor_point10_i_value
0,-2.71,-1.14,-1.12,-1.72,-1.09,-2.67
1,-2.4,-0.93,-1.24,-1.55,-1.19,-2.09
2,-2.4,-0.93,-1.24,-1.55,-1.19,-2.09
3,-2.4,-0.93,-1.24,-1.55,-1.19,-2.09
4,-2.4,-0.93,-1.24,-1.55,-1.19,-2.09


In [17]:
with open('../Data/meta/chain_model_res.pickle', 'wb') as f:
    pickle.dump(predict_y_train, f)

In [18]:
# # 迴圈依據套用模型計算預測值(Y)，並將預測結果放入X中做為新變數供下一輪模型預測時使用。(最佳模型順序[5, 3, 2, 1, 4, 0])

# x_pt = data_x.copy()
# predict_y_train = pd.DataFrame({})

# for k in range(6):
    
#     with open('../model/model_Y'+ str(model_chains_config.order_list[k]) + '(Albert).pickle', 'rb') as f:
#         model = pickle.load(f)
    
#     # 儲存預測結果
#     predict_y_train[model_chains_config.data_y_col[model_chains_config.order_list[k]]] = model.predict(x_pt)
#     # 把取得的預測值當作變數放進X
#     x_pt[model_chains_config.data_y_col[model_chains_config.order_list[k]]] = model.predict(x_pt)
    
#     # 清空 model
#     del model
    
# # 調整欄位順序
# predict_y_train = predict_y_train[model_chains_config.data_y_col]
# predict_y_train.head()

In [19]:
# Load y-scaler model
with open(f'../model/standard_scaler_y.pickle', 'rb') as f:
    scaler_y = pickle.load(f)
    
with open(f'../Data/meta/non_scaled_train_y_after_feature_engineering.pickle', 'rb') as f:
    non_scaled_train_y = pickle.load(f)

In [20]:
from utils.metric_utils import rmse_score

predictions = scaler_y.inverse_transform(predict_y_train)
targets = non_scaled_train_y
print(rmse_score(predictions, targets))

sensor_point5_i_value     5.24
sensor_point6_i_value     9.37
sensor_point7_i_value    10.21
sensor_point8_i_value     8.00
sensor_point9_i_value    11.14
sensor_point10_i_value    7.46
dtype: float64
