# Model Prediction
---
Description:

This notebook shows the test processing. In the end of content, you can get two predictive result by different method.
It depends on `model_predict_config.py` to lead the data flow.

---
## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
import pickle

import model_predict_config as mp_config
import feature_engineering_config as fe_config

# feature engineering
from feature_engineering import DealWithMissingValue, VariableTransformation, OutlierDetection,\
FeatureSelection, CreateGroupFeatureFromEachCol, CreateGroupFeatureFromAllCol, GeneratePolynomialFeatures, ReduceDimensionPCA, Standardization

## 2. Read xlsx File

In [2]:
test_x = pd.read_excel('../Data/2022-test-v1.xlsx')

In [3]:
test_x.head()

Unnamed: 0,clean_temp,clean_ec,clean_ph4,clean_ph5,clean_ph7,clean_ph8,clean_pressure11,clean_pressure12,clean_pressure21,clean_pressure22,...,env_rpi14_pm1,env_rpi14_pm10,env_rpi14_pm25,env_rpi14_temp,env_rpi15_hum,env_rpi15_lux,env_rpi15_pm1,env_rpi15_pm10,env_rpi15_pm25,env_rpi15_temp
0,41.9,12.5,,,,7.0,820.26,1266.29,882.58,630.71,...,26.87,48.62,38.0,34.57,43.95,0.5,30.37,53.0,43.37,34.91
1,41.1,10.67,,,,6.75,810.2,1266.53,885.96,630.76,...,17.71,30.14,24.42,36.16,41.48,0.42,13.71,21.42,18.14,37.81
2,41.1,10.67,,,,6.75,809.84,1266.31,885.65,630.65,...,17.71,30.14,24.42,36.16,41.48,0.42,13.71,21.42,18.14,37.81
3,50.2,12.67,8.6,7.8,7.3,7.13,848.06,1454.1,1000.42,899.14,...,26.5,49.37,39.37,34.71,48.44,1.25,27.12,45.5,38.0,34.79
4,50.2,12.67,8.6,7.8,7.3,7.13,848.06,1454.1,1000.42,899.14,...,26.5,49.37,39.37,34.71,48.44,1.25,27.12,45.5,38.0,34.79


In [4]:
test_x.shape

(100, 125)

## 3. Pipeline

In [5]:
train_process = mp_config.train_process
random_state = mp_config.random_state

In [6]:
# define nan
if mp_config.replace_zero_to_nan:
    test_x = test_x.replace({0: np.nan})

# missing values
if mp_config.deal_with_missing_value == 'drop_na':
    test_x = DealWithMissingValue(test_x).drop_na()
elif mp_config.deal_with_missing_value == 'imputation':
    test_x = DealWithMissingValue(test_x).imputation(strategy='most_frequent', train=train_process)
elif mp_config.deal_with_missing_value == 'k_neighbors_regressor':
    test_x = DealWithMissingValue(test_x).iterative_imputer(method='k_neighbors_regressor', train=train_process)
else:
    pass


# variable transformation
with open(f'../Model/variable_transformation.pickle', 'rb') as f:
    large_skew_bool = pickle.load(f)

for col in test_x._get_numeric_data().columns[large_skew_bool]:
    test_x[col] = VariableTransformation(test_x, col=col).transform(method=mp_config.variable_transformation, train=train_process)

    
    
    
# create group feature  
test_x['group'] = CreateGroupFeatureFromAllCol(test_x).kmeans_with_auto_k(
    standardization=mp_config.kmeans_standardization,
    k_range=mp_config.kmeans_k_range,
    random_state=random_state,
    parallel=True,
    parallel_verbose=0,
    train=train_process
).astype(str)


# convert each numeric data to category data
for col in test_x.columns:
    if col == 'group':
        pass
    else:
        test_x[col+'_group'] = CreateGroupFeatureFromEachCol(test_x[[col]]).kmeans_with_auto_k(
            standardization=mp_config.kmeans_standardization,
            k_range=mp_config.kmeans_k_range,
            random_state=random_state,
            parallel=True,
            parallel_verbose=0,
            train=train_process
        ).astype(str)
        
        
# generate polynomial and interaction features
if mp_config.generate_polynomial_features:
    test_x = GeneratePolynomialFeatures(test_x).get_dataframe(
        degree=mp_config.pf_degree,
        interaction_only=mp_config.pf_interaction_only,
        train=train_process
    )
    
if mp_config.reduce_dimension:
    test_x = ReduceDimensionPCA(test_x).pca(n_components=0.99, train=train_process)
    
    
if mp_config.standardization == 'standard_scaler':
    test_x = Standardization(test_x, prefix='x').standard_scaler(train=train_process)
elif mp_config.standardization == 'min_max_scaler':
    test_x = Standardization(test_x, prefix='x').min_max_scaler(train=train_process)


    
# Feature Selection
test_x = test_x[mp_config.feature_select]

In [7]:
test_x.head()

Unnamed: 0,clean_pressure31,clean_pressure33,clean_pressure41,clean_pressure51,clean_pressure52,clean_pressure61,clean_pressure71,clean_pressure72,clean_pressure81,clean_pressure91,...,clean_pressure82_group,oven_pb1_group,oven_pb2_group,oven_a1_group,oven_b1_group,oven_b3_group,painting_g3_act_hvv_group,painting_g8_act_f_air_group,painting_g9_act_hvc_group,env_rpi05_temp_group
0,-0.868736,-2.332674,2.327716,2.007752,-2.977568,1.903804,0.292734,2.852519,1.715746,-0.553109,...,4,4,4,4,4,4,4,4,4,4
1,-3.14182,-2.342185,2.327716,1.153487,-2.969933,0.71051,-0.118546,2.708596,1.456228,-1.027601,...,4,4,4,4,4,4,4,4,4,4
2,-3.175087,-2.341928,2.327716,1.076818,-2.971485,0.71051,-0.117792,2.708596,1.478729,-1.053326,...,4,4,4,4,4,4,4,4,4,4
3,-0.610754,0.167955,2.327716,0.701092,1.355382,1.924557,-0.958072,1.125443,1.395245,-0.49631,...,4,4,4,4,4,4,4,4,4,4
4,-0.610754,0.167955,2.327716,0.701092,1.355382,1.924557,-0.958072,1.125443,1.395245,-0.49631,...,4,4,4,4,4,4,4,4,4,4


## 4. Save test data after feature engineering

In [8]:
with open(f'../Data/meta/test_x_after_feature_engineering.pickle', 'wb' ) as f:
    pickle.dump(test_x, f)

## 5. Muti-output Model and Single Output Model

* Muti-output Model

In [10]:
with open(f'../Model/catboost_all_20220913235830.pickle', 'rb') as f:
    muti_output_model = pickle.load(f)

* Single output Model

In [11]:
with open(f'../Model/catboost_single_20220913235830.pickle', 'rb' ) as f:
    single_output_model_dic = pickle.load(f)

## 6. Load y Scaler Model

In [12]:
with open(f'../Model/standard_scaler_y.pickle', 'rb') as f:
    scaler_y = pickle.load(f)

## 7. Prediction

* Muti-output Model

In [13]:
prediction_muti = scaler_y.inverse_transform( muti_output_model.predict(test_x) )

* Single output Model

In [14]:
with open(f'../Model/target_col_list.pickle', 'rb') as f:
    test_y_col = pickle.load(f)

In [15]:
array_li= [single_output_model_dic[col_y]['model'].predict(test_x).reshape(-1,1) for col_y in test_y_col]
all_res = np.concatenate(array_li, axis=1)

In [16]:
prediction_single = scaler_y.inverse_transform(all_res)

## Stack of estimators with a final regressor.

In [None]:
muti_output_model