In [1]:
import os
import sys
import numpy as np
import polars as pl

package_paths = [
    '/kaggle/input/enefit-experiment-benchmark-live/kaggle_enefit_energy',
]

for pth in package_paths:
    sys.path.append(pth)

In [2]:
import enefit

from src.utils.import_utils import import_config, import_params
from src.preprocess import PreprocessPipeline
from src.model.lgbm import ModelPipeline

config_dict = {
    "PATH_ORIGINAL_DATA": r'/kaggle/input/predict-energy-behavior-of-prosumers',
    "PATH_PARQUET_DATA": r'data/parquet',
    "PATH_MAPPING_DATA": r'/kaggle/input/predict-energy-behavior-of-prosumers',
    "PATH_EXPERIMENT": r'experiment',
    "N_FOLD": 5,
    "TARGET_COL": "target"
}

if not os.path.exists('config'):
    os.makedirs('config')

if not os.path.exists(config_dict['PATH_PARQUET_DATA']):
    os.makedirs(config_dict['PATH_PARQUET_DATA'])

if not os.path.exists(config_dict['PATH_EXPERIMENT']):
    os.makedirs(config_dict['PATH_EXPERIMENT'])
                
with open('/kaggle/input/enefit-experiment-benchmark-live/kaggle_enefit_energy/config/best_feature.txt', "r") as file:
    to_write = file.read()
    
    with open('config/best_feature.txt', 'w') as file_write:
        file_write.write(to_write)

In [3]:
data_processor = PreprocessPipeline(
    config_dict=config_dict, 
    target_n_lags=14,
    embarko_skip=60
)
trainer = ModelPipeline(
    experiment_name='benchmark_live',
    params_lgb={
        "boosting_type": "gbdt",
        "objective": "mae",
        "n_jobs": -1,
        "num_leaves": 256,
        "learning_rate": 0.1,
        "feature_fraction": 0.75,
        "feature_fraction_bynode": 0.75,
        "bagging_freq": 1,
        "bagging_fraction": 0.80,
        "lambda_l1": 3.5,
        "lambda_l2": 1.5,
        "max_depth": 20,
        "min_data_in_leaf": 50,
        "verbosity": -1,
        "n_round": 3000,
        "min_data_in_bin": 5
    },
    config_dict=config_dict,
    metric_eval='l1', log_evaluation=50,
    use_importance_filter=True, number_importance_feature=200
)

Importing best feature from lgb experiment


In [4]:
env = enefit.make_env()
iter_test = env.iter_test()

In [5]:
for (
    test_data, 
    target_data_new, 
    client_data_new, 
    historical_weather_data_new,
    forecast_weather_data_new, 
    electricity_data_new, 
    gas_data_new, 
    sample_prediction
) in iter_test:
        
    if data_processor.inference:
        #inference phase append and predict if necessary. don't retrain
        data_processor.update_with_new_data(
            client_data_new = client_data_new,
            gas_data_new = gas_data_new,
            electricity_data_new = electricity_data_new,
            forecast_weather_data_new = forecast_weather_data_new,
            historical_weather_data_new = historical_weather_data_new,
            target_data_new = target_data_new,
            test_data = test_data
        )
        
        if any(test_data["currently_scored"]):
            data_processor()

            #ensure correct order of prediction
            sample_prediction["target"] = trainer.predict(data_processor.data)
                
        else:
            sample_prediction["target"] = 0
            
    else:
        #APPENDING PHASE.. WAITING TO START TRAINING
        test_data_append = target_data_new.rename(
                columns={"datetime": "prediction_datetime"}
            )
        test_data_append['currently_scored'] = False

        data_processor.update_with_new_data(
            client_data_new = client_data_new,
            gas_data_new = gas_data_new,
            electricity_data_new = electricity_data_new,
            forecast_weather_data_new = forecast_weather_data_new,
            historical_weather_data_new = historical_weather_data_new,
            target_data_new = target_data_new,
            test_data = test_data_append
        )
        if any(test_data["currently_scored"]):
            #train the model first time then enter inference phase
            
            print('Getting dataset')
            data_processor()
            
            print('training model')
            trainer.train_explain()
            
            print('Activating inference')
            
            #activate inference now and predict
            trainer.activate_inference()
            data_processor.begin_inference()
            
            #update test data to predict
            data_processor.update_with_new_data(
                client_data_new = client_data_new,
                gas_data_new = gas_data_new,
                electricity_data_new = electricity_data_new,
                forecast_weather_data_new = forecast_weather_data_new,
                historical_weather_data_new = historical_weather_data_new,
                target_data_new = target_data_new,
                test_data = test_data
            )
            data_processor()

            #predict
            sample_prediction["target"] = trainer.predict(data_processor.data)
        
        else:
            #continue appending data
            sample_prediction["target"] = 0

    env.predict(sample_prediction)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
