# 4. Inference

In [1]:
import sys, os
import torch
import pandas as pd
sys.path.append("../../")
sys.path.append("../../../")
sys.path.append("../../../../")

from Clust.clust.ML.tool import data as ml_data
from Clust.clust.ML.tool import scaler as ml_scaler
from Clust.clust.ML.tool import clean as ml_clean

from Clust.setting import influx_setting_KETI as ins
from Clust.clust.ingestion.influx import influx_client_v2 as influx_Client
from Clust.clust.ingestion.mongo.mongo_client import MongoClient

db_client = influx_Client.InfluxClient(ins.CLUSTDataServer2)
mongo_client = MongoClient(ins.CLUSTMetaInfo2)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"{device}" " is available.")

  from .autonotebook import tqdm as notebook_tqdm


cuda is available.


In [2]:
# Set regression mode i.e., 'regression','forecast'
mode_selection = "regression"

# get integrated data name
bucket_name = 'integration'

# scaler path
scalerPath = './scaler/'

### 4-1. Get model meta by mongodb

In [3]:
db_name = 'model'
collection_name = 'meta'

if mode_selection == 'regression':
    search = {'modelName': 'regression_energy_LSTM_rg_cleanLevel0'}
elif mode_selection == 'forecast':
    search = {'modelName': 'forecasting_Hs2SwineFarmWithWeatherTime_LSTM_rg_cleanLevel0'}


model_meta= mongo_client.get_document_by_json(db_name, collection_name, search)[0]
model_meta

{'trainDataInfo': {'data_name': 'regression_energy_trainX_cleanLevel0',
  'ingestion_param': {'start_time': '2016-01-11',
   'end_time': '2016-04-15',
   'ms_list_info': [['life_indoor_environment', 'humidityTrain_10min'],
    ['life_indoor_environment', 'temperatureTrain_10min'],
    ['weather_outdoor_environment', 'belgiumChieverseAirportTrain_10min']]},
  'integration_param': {'param': {},
   'method': 'meta',
   'integration_duration': 'common',
   'integration_frequency': 600},
  'clean_level': 0,
  'process_param': {'refine_param': {'removeDuplication': {'flag': False},
    'staticFrequency': {'flag': False, 'frequency': None}},
   'outlier_param': {'certainErrorToNaN': {'flag': False},
    'unCertainErrorToNaN': {'flag': False}},
   'imputation_param': {'flag': False}}},
 'modelName': 'regression_energy_LSTM_rg_cleanLevel0',
 'featureList': ['Press_mm_hg',
  'RH_1',
  'RH_2',
  'RH_3',
  'RH_4',
  'RH_5',
  'RH_6',
  'RH_7',
  'RH_8',
  'RH_9',
  'RH_out',
  'T1',
  'T2',
  'T3'

### 4-2. Inference data selection

In [4]:
# 0. pick only one data
collection_list = mongo_client.get_collection_list(bucket_name)
collection_list

['forecasting_strawberryOpen',
 'regression_energy',
 'forecasting_Hs2SwineFarmWithWeatherTime',
 'forecasting_gunwiStrawberryWeather',
 'classification_actionPattern']

In [5]:
if mode_selection == 'regression':

    data_name_X = 'regression_energy_testX_cleanLevel0'
    dataX = db_client.get_data(bucket_name, data_name_X)
    
    clean_level = model_meta["trainDataInfo"]["clean_level"]
    integration_freq_sec = model_meta["trainDataInfo"]["integration_param"]["integration_frequency"]
    
elif mode_selection == 'forecast':

    data_name_X = 'forecasting_Hs2SwineFarmWithWeatherTime_test_cleanLevel0'
    dataX = db_client.get_data(bucket_name, data_name_X)
    
    clean_level = model_meta["trainDataInfo"]["clean_level"]
    integration_freq_sec = model_meta["trainDataInfo"]["integration_param"]["integration_frequency"]

In [6]:
print(dataX.head)

<bound method NDFrame.head of                            Press_mm_hg       RH_1       RH_2       RH_3  \
time                                                                      
2021-01-01 00:00:00+00:00   757.000000  40.260000  43.200000  38.530000   
2021-01-01 00:10:00+00:00   757.116667  40.260000  43.163333  38.500000   
2021-01-01 00:20:00+00:00   757.233333  40.290000  42.963333  38.633333   
2021-01-01 00:30:00+00:00   757.350000  40.290000  42.490000  38.790000   
2021-01-01 00:40:00+00:00   757.466667  40.626667  42.156667  38.596667   
...                                ...        ...        ...        ...   
2021-02-11 23:10:00+00:00   756.200000  38.163333  34.760000  37.884286   
2021-02-11 23:20:00+00:00   756.200000  38.172500  34.700000  37.790000   
2021-02-11 23:30:00+00:00   756.200000  38.126667  34.590000  37.790000   
2021-02-11 23:40:00+00:00   756.200000  37.990000  34.590000  37.790000   
2021-02-11 23:50:00+00:00   756.200000  37.790000  34.626667  37.79000

### 4-3. Inference data preparation

In [7]:
feature_list = model_meta['featureList']
target = model_meta['target']
scaler_param = model_meta['scalerParam']
model_file_path = model_meta['files']['modelFile']['filePath']
model_method = model_meta['model_method']

if mode_selection == 'regression':
    # Scaling Inference Input
    X_scaler_file_path = model_meta['files']['XScalerFile']["filePath"]
    y_scaler_file_path = model_meta['files']['yScalerFile']["filePath"]

    infer_X, scaler_X = ml_scaler.get_scaled_test_data(dataX[feature_list], X_scaler_file_path, scaler_param)
    scaler_y = ml_scaler.get_scaler_file(y_scaler_file_path)

elif mode_selection == 'forecast':
    clean_param = model_meta['cleanLevel']
    nan_processing_param = model_meta['NaNProcessingParam']

    # Scaling Inference Input
    X_scaler_file_path = model_meta['files']['XScalerFile']['filePath']
    infer_X, scaler_X = ml_scaler.get_scaled_test_data(dataX[feature_list], X_scaler_file_path, scaler_param)
    clean_infer_X = ml_clean.get_cleand_data(infer_X, clean_param, integration_freq_sec, nan_processing_param)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [8]:
print(infer_X)

                           Press_mm_hg      RH_1      RH_2      RH_3  \
time                                                                   
2021-01-01 00:00:00+00:00     0.669082  0.364279  0.664944  0.456302   
2021-01-01 00:10:00+00:00     0.671900  0.364279  0.663872  0.454900   
2021-01-01 00:20:00+00:00     0.674718  0.365104  0.658023  0.461131   
2021-01-01 00:30:00+00:00     0.677536  0.365104  0.644180  0.468453   
2021-01-01 00:40:00+00:00     0.680354  0.374369  0.634432  0.459417   
...                                ...       ...       ...       ...   
2021-02-11 23:10:00+00:00     0.649758  0.306577  0.418113  0.426123   
2021-02-11 23:20:00+00:00     0.649758  0.306830  0.416358  0.421717   
2021-02-11 23:30:00+00:00     0.649758  0.305568  0.413141  0.421717   
2021-02-11 23:40:00+00:00     0.649758  0.301807  0.413141  0.421717   
2021-02-11 23:50:00+00:00     0.649758  0.296303  0.414213  0.421717   

                               RH_4      RH_5      RH_6      RH

In [10]:
transformParameter = model_meta['transformParameter']
# # transform
if mode_selection == 'regression':
    window_num = transformParameter['window_num']
    inferX = []
    inferX.append(infer_X[-window_num:].values)
    inferX = np.array(inferX)

# forecast
elif mode_selection == 'forecast':
    past_step = transformParameter['past_step']
    inferX = []
    inferX.append(clean_infer_X[-past_step:].values)
    inferX = np.array(inferX)

KeyError: 'window_num'

In [None]:
modelParameter = model_meta["modelParameter"]

inferParameter = {
    'device': 'cpu',
    'batch_size': 1
}

In [None]:
# 4. Inference
from Clust.clust.ML.regression_YK.inference import RegressionInference as RI

ri = RI()
ri.set_param(inferParameter)
ri.set_model(model_method, model_file_path, modelParameter)
ri.set_data(inferX)
preds = ri.inference()
print(preds)

In [None]:
# for regression
if mode_selection == 'regression':
    if scaler_param =='scale':
        base_df_for_inverse = pd.DataFrame(columns=target, index=range(len(preds)))
        base_df_for_inverse[target] = preds
        prediction_result = pd.DataFrame(scaler_y.inverse_transform(base_df_for_inverse), columns=target, index=base_df_for_inverse.index)
    else:
        prediction_result = pd.DataFrame(data={"value": preds}, index=range(len(preds)))

# for forecast
elif mode_selection == 'forecast':
    if scaler_param =='scale':
        base_df_for_inverse = pd.DataFrame(columns=feature_list, index=range(len(preds)))
        base_df_for_inverse[target] = preds
        inverse_result = pd.DataFrame(scaler_X.inverse_transform(base_df_for_inverse), columns=feature_list, index=base_df_for_inverse.index)
        target_data = inverse_result[target]
        prediction_result = pd.DataFrame(data={target: target_data}, index=range(len(preds)))
    else:
        prediction_result = pd.DataFrame(data={target: preds}, index=range(len(preds)))

In [None]:
prediction_result