In [1]:
!pip install dalex # requires "Internet on"
import dalex as dx
import matplotlib.pyplot as plt

Collecting dalex
  Downloading dalex-1.6.0.tar.gz (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: dalex
  Building wheel for dalex (setup.py) ... [?25ldone
[?25h  Created wheel for dalex: filename=dalex-1.6.0-py3-none-any.whl size=1045995 sha256=29f636a1873024efb7702b99c85d25935fbf7e01cf3bf036c1ce50ff9ccb35fc
  Stored in directory: /root/.cache/pip/wheels/c8/45/19/f5810bf7c5ff9a476ebd89bb5b81a18ffcdf93931d17dbb0c1
Successfully built dalex
Installing collected packages: dalex
Successfully installed dalex-1.6.0




In [2]:
import os
import gc
import pickle

import numpy as np
import pandas as pd
import polars as pl

from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import mean_absolute_error
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import VotingRegressor

import lightgbm as lgb

import optuna

class MonthlyKFold:
    def __init__(self, n_splits=3):
        self.n_splits = n_splits
        
    def split(self, X, y, groups=None):
        dates = 12 * X["year"] + X["month"]
        timesteps = sorted(dates.unique().tolist())
        X = X.reset_index()
        
        for t in timesteps[-self.n_splits:]:
            idx_train = X[dates.values < t].index
            idx_test = X[dates.values == t].index
            
            yield idx_train, idx_test
            
    def get_n_splits(self, X, y, groups=None):
        return self.n_splits

def feature_eng(df_data, df_client, df_gas, df_electricity, df_forecast, df_historical, df_location, df_target):
    df_data = (
        df_data
        .with_columns(
            pl.col("datetime").cast(pl.Date).alias("date"),
        )
    )
    
    df_client = (
        df_client
        .with_columns(
            (pl.col("date") + pl.duration(days=2)).cast(pl.Date)
        )
    )
    
    df_gas = (
        df_gas
        .rename({"forecast_date": "date"})
        .with_columns(
            (pl.col("date") + pl.duration(days=1)).cast(pl.Date)
        )
    )
    
    df_electricity = (
        df_electricity
        .rename({"forecast_date": "datetime"})
        .with_columns(
            pl.col("datetime") + pl.duration(days=1)
        )
    )
    
    df_location = (
        df_location
        .with_columns(
            pl.col("latitude").cast(pl.datatypes.Float32),
            pl.col("longitude").cast(pl.datatypes.Float32)
        )
    )
    
    df_forecast = (
        df_forecast
        .rename({"forecast_datetime": "datetime"})
        .with_columns(
            pl.col("latitude").cast(pl.datatypes.Float32),
            pl.col("longitude").cast(pl.datatypes.Float32),
            pl.col('datetime').dt.convert_time_zone("Europe/Bucharest").dt.replace_time_zone(None).cast(pl.Datetime("us")),
        )
        .join(df_location, how="left", on=["longitude", "latitude"])
        .drop("longitude", "latitude")
    )
    
    df_historical = (
        df_historical
        .with_columns(
            pl.col("latitude").cast(pl.datatypes.Float32),
            pl.col("longitude").cast(pl.datatypes.Float32),
            pl.col("datetime") + pl.duration(hours=37)
        )
        .join(df_location, how="left", on=["longitude", "latitude"])
        .drop("longitude", "latitude")
    )
    
    df_forecast_date = (
        df_forecast
        .group_by("datetime").mean()
        .drop("county")
    )
    
    df_forecast_local = (
        df_forecast
        .filter(pl.col("county").is_not_null())
        .group_by("county", "datetime").mean()
    )
    
    df_historical_date = (
        df_historical
        .group_by("datetime").mean()
        .drop("county")
    )
    
    df_historical_local = (
        df_historical
        .filter(pl.col("county").is_not_null())
        .group_by("county", "datetime").mean()
    )
    
    df_data = (
        df_data
        .join(df_gas, on="date", how="left")
        .join(df_client, on=["county", "is_business", "product_type", "date"], how="left")
        .join(df_electricity, on="datetime", how="left")
        
        .join(df_forecast_date, on="datetime", how="left", suffix="_fd")
        .join(df_forecast_local, on=["county", "datetime"], how="left", suffix="_fl")
        .join(df_historical_date, on="datetime", how="left", suffix="_hd")
        .join(df_historical_local, on=["county", "datetime"], how="left", suffix="_hl")
        
        .join(df_forecast_date.with_columns(pl.col("datetime") + pl.duration(days=7)), on="datetime", how="left", suffix="_fdw")
        .join(df_forecast_local.with_columns(pl.col("datetime") + pl.duration(days=7)), on=["county", "datetime"], how="left", suffix="_flw")
        .join(df_historical_date.with_columns(pl.col("datetime") + pl.duration(days=7)), on="datetime", how="left", suffix="_hdw")
        .join(df_historical_local.with_columns(pl.col("datetime") + pl.duration(days=7)), on=["county", "datetime"], how="left", suffix="_hlw")
        
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=2)).rename({"target": "target_1"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=3)).rename({"target": "target_2"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=4)).rename({"target": "target_3"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=5)).rename({"target": "target_4"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=6)).rename({"target": "target_5"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=7)).rename({"target": "target_6"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        .join(df_target.with_columns(pl.col("datetime") + pl.duration(days=14)).rename({"target": "target_7"}), on=["county", "is_business", "product_type", "is_consumption", "datetime"], how="left")
        
        .with_columns(
            pl.col("datetime").dt.ordinal_day().alias("dayofyear"),
            pl.col("datetime").dt.hour().alias("hour"),
            pl.col("datetime").dt.day().alias("day"),
            pl.col("datetime").dt.weekday().alias("weekday"),
            pl.col("datetime").dt.month().alias("month"),
            pl.col("datetime").dt.year().alias("year"),
        )
        
        .with_columns(
            pl.concat_str("county", "is_business", "product_type", "is_consumption", separator="_").alias("category_1"),
        )
        
        .with_columns(
            (np.pi * pl.col("dayofyear") / 183).sin().alias("sin(dayofyear)"),
            (np.pi * pl.col("dayofyear") / 183).cos().alias("cos(dayofyear)"),
            (np.pi * pl.col("hour") / 12).sin().alias("sin(hour)"),
            (np.pi * pl.col("hour") / 12).cos().alias("cos(hour)"),
        )
        
        .with_columns(
            pl.col(pl.Float64).cast(pl.Float32),
        )
        
        .drop("date", "datetime", "hour", "dayofyear")
    )
    
    return df_data

def to_pandas(X, y=None):
    cat_cols = ["county", "is_business", "product_type", "is_consumption", "category_1"]
    
    if y is not None:
        df = pd.concat([X.to_pandas(), y.to_pandas()], axis=1)
    else:
        df = X.to_pandas()    
    
    df = df.set_index("row_id")
    df[cat_cols] = df[cat_cols].astype("category")
    
    df["target_mean"] = df[[f"target_{i}" for i in range(1, 7)]].mean(1)
    df["target_std"] = df[[f"target_{i}" for i in range(1, 7)]].std(1)
    df["target_ratio"] = df["target_6"] / (df["target_7"] + 1e-3)
    
    return df

def lgb_objective(trial):
    params = {
        'n_iter'           : 1000,
        'verbose'          : -1,
        'random_state'     : 42,
        'objective'        : 'l2',
        'learning_rate'    : trial.suggest_float('learning_rate', 0.01, 0.1),
        'colsample_bytree' : trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'colsample_bynode' : trial.suggest_float('colsample_bynode', 0.5, 1.0),
        'lambda_l1'        : trial.suggest_float('lambda_l1', 1e-2, 10.0),
        'lambda_l2'        : trial.suggest_float('lambda_l2', 1e-2, 10.0),
        'min_data_in_leaf' : trial.suggest_int('min_data_in_leaf', 4, 256),
        'max_depth'        : trial.suggest_int('max_depth', 5, 10),
        'max_bin'          : trial.suggest_int('max_bin', 32, 1024),
    }
    
    model  = lgb.LGBMRegressor(**params)
    X, y   = df_train.drop(columns=["target"]), df_train["target"]
    cv     = MonthlyKFold(1)
    scores = cross_val_score(model, X, y, cv=cv, scoring='neg_mean_absolute_error')
    
    return -1 * np.mean(scores)



In [3]:
### Global Variables

root = "/kaggle/input/predict-energy-behavior-of-prosumers"

data_cols        = ['target', 'county', 'is_business', 'product_type', 'is_consumption', 'datetime', 'row_id']
client_cols      = ['product_type', 'county', 'eic_count', 'installed_capacity', 'is_business', 'date']
gas_cols         = ['forecast_date', 'lowest_price_per_mwh', 'highest_price_per_mwh']
electricity_cols = ['forecast_date', 'euros_per_mwh']
forecast_cols    = ['latitude', 'longitude', 'hours_ahead', 'temperature', 'dewpoint', 'cloudcover_high', 'cloudcover_low', 'cloudcover_mid', 'cloudcover_total', '10_metre_u_wind_component', '10_metre_v_wind_component', 'forecast_datetime', 'direct_solar_radiation', 'surface_solar_radiation_downwards', 'snowfall', 'total_precipitation']
historical_cols  = ['datetime', 'temperature', 'dewpoint', 'rain', 'snowfall', 'surface_pressure','cloudcover_total','cloudcover_low','cloudcover_mid','cloudcover_high','windspeed_10m','winddirection_10m','shortwave_radiation','direct_solar_radiation','diffuse_radiation','latitude','longitude']
location_cols    = ['longitude', 'latitude', 'county']
target_cols      = ['target', 'county', 'is_business', 'product_type', 'is_consumption', 'datetime']

save_path = None
load_path = '/kaggle/input/enefit-lgbm-model/model.pickle'

### Data

df_data        = pl.read_csv(os.path.join(root, "train.csv"), columns=data_cols, try_parse_dates=True)
df_client      = pl.read_csv(os.path.join(root, "client.csv"), columns=client_cols, try_parse_dates=True)
df_gas         = pl.read_csv(os.path.join(root, "gas_prices.csv"), columns=gas_cols, try_parse_dates=True)
df_electricity = pl.read_csv(os.path.join(root, "electricity_prices.csv"), columns=electricity_cols, try_parse_dates=True)
df_forecast    = pl.read_csv(os.path.join(root, "forecast_weather.csv"), columns=forecast_cols, try_parse_dates=True)
df_historical  = pl.read_csv(os.path.join(root, "historical_weather.csv"), columns=historical_cols, try_parse_dates=True)
df_location    = pl.read_csv(os.path.join(root, "weather_station_to_county_mapping.csv"), columns=location_cols, try_parse_dates=True)
df_target      = df_data.select(target_cols)

schema_data        = df_data.schema
schema_client      = df_client.schema
schema_gas         = df_gas.schema
schema_electricity = df_electricity.schema
schema_forecast    = df_forecast.schema
schema_historical  = df_historical.schema
schema_target      = df_target.schema

### Feature Engineering

X, y = df_data.drop("target"), df_data.select("target")
X = feature_eng(X, df_client, df_gas, df_electricity, df_forecast, df_historical, df_location, df_target)

df_train = to_pandas(X, y)
df_train = df_train[df_train["target"].notnull() & df_train["year"].gt(2021)]
df_train.info(verbose=True)

### Training

if load_path is not None:
    model = pickle.load(open(load_path, "rb"))
else:
    model = VotingRegressor([
        ('lgb_1', lgb.LGBMRegressor(**best_params, random_state=100)), 
        ('lgb_2', lgb.LGBMRegressor(**best_params, random_state=101)), 
        ('lgb_3', lgb.LGBMRegressor(**best_params, random_state=102)), 
        ('lgb_4', lgb.LGBMRegressor(**best_params, random_state=103)), 
        ('lgb_5', lgb.LGBMRegressor(**best_params, random_state=104)), 
    ])
    
    model.fit(
        X=df_train.drop(columns=["target"]),
        y=df_train["target"]
    )

if save_path is not None:
    with open(save_path, "wb") as f:
        pickle.dump(model, f)

<class 'pandas.core.frame.DataFrame'>
Index: 1651902 entries, 366048 to 2018351
Data columns (total 137 columns):
 #    Column                                 Dtype   
---   ------                                 -----   
 0    county                                 category
 1    is_business                            category
 2    product_type                           category
 3    is_consumption                         category
 4    lowest_price_per_mwh                   float32 
 5    highest_price_per_mwh                  float32 
 6    eic_count                              float64 
 7    installed_capacity                     float32 
 8    euros_per_mwh                          float32 
 9    hours_ahead                            float32 
 10   temperature                            float32 
 11   dewpoint                               float32 
 12   cloudcover_high                        float32 
 13   cloudcover_low                         float32 
 14   cloudcover_mid  

In [4]:
X = df_train.drop(columns=["target"])
y = df_train["target"]

In [5]:
lgm_explainer = dx.Explainer(model, X, y, label='lgm')

Preparation of a new explainer is initiated

  -> data              : 1651902 rows 136 cols
  -> target variable   : Parameter 'y' was a pandas.Series. Converted to a numpy.ndarray.
  -> target variable   : 1651902 values
  -> model_class       : sklearn.ensemble._voting.VotingRegressor (default)
  -> label             : lgm
  -> predict function  : <function yhat_default at 0x7998a9a44a60> will be used (default)
  -> predict function  : Accepts pandas.DataFrame and numpy.ndarray.
  -> predicted values  : min = -3.34e+02, mean = 2.85e+02, max = 1.53e+04
  -> model type        : regression will be used (default)
  -> residual function : difference between y and yhat (default)
  -> residuals         : min = -2.61e+03, mean = 0.000241, max = 4.04e+03
  -> model_info        : package sklearn

A new explainer has been created!


In [6]:
pfi = lgm_explainer.model_parts(loss_function='mae')
pfi.plot(max_vars=25)

In [7]:
features = list(reversed(pfi.result['variable'].to_list()))[1:26]

for var in features[:5]:
    if var in ['is_consumption', 'category_1']: continue
    mp = lgm_explainer.model_profile(groups='is_consumption', variables=var)
    mp.result['_label_'].replace({'lgm_0':'production', 'lgm_1':'consumption'}, inplace=True)
    mp.plot()

Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.05s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           142.90948242   285.81896484 ... 14005.12927734
 14148.03875977 14290.94824219]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.27s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.50s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.05s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





In [8]:
for var in features[5:]:
    if var in ['is_consumption', 'category_1']: continue
    mp = lgm_explainer.model_profile(groups='is_consumption', variables=var)
    mp.result['_label_'].replace({'lgm_0':'production', 'lgm_1':'consumption'}, inplace=True)
    mp.plot()

Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.42s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-4.19027776e-01  8.72585403e+00  1.78707358e+01 ...  8.95779389e+02
  9.04924271e+02  9.14069153e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.08s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[   0.           49.43988281   98.87976563 ... 4845.10851563 4894.54839844
 4943.98828125]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.73s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[1.   1.06 1.12 ... 6.88 6.94 7.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.91s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[6.00000000e+00 1.99083105e+02 3.92166211e+02 ... 1.89281443e+04
 1.91212274e+04 1.93143105e+04]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.82s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.78s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-3.46230157e-02  8.80610896e+00  1.76468409e+01 ...  8.66357110e+02
  8.75197842e+02  8.84038574e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.66s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.   -0.98 -0.96 ...  0.96  0.98  1.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.80s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.03s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.80158727e-02  7.97183897e+00  1.59616938e+01 ...  7.82987759e+02
  7.90977613e+02  7.98967468e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.78s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[   5.     20.12   35.24 ... 1486.76 1501.88 1517.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.91s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-0.99996316 -0.9799639  -0.95996464 ...  0.95996464  0.9799639
  0.99996316]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.73s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.         -0.98000147 -0.96000295 ...  0.9598556   0.97985413
  0.99985266]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.86s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[ 1.   1.3  1.6 ... 30.4 30.7 31. ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.77s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-7.59959221e-06  8.88523459e-05  1.85304284e-04 ...  9.44469035e-03
  9.54114228e-03  9.63759422e-03]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[ 1.    1.11  1.22 ... 11.78 11.89 12.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.76s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.87222227e-01  8.10434238e+00  1.63959070e+01 ...  8.12386109e+02
  8.20677674e+02  8.28969238e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.70s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-22.57199669 -22.13757652 -21.70315636 ...  20.00117958  20.43559975
  20.87001991]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.





In [9]:
for var in features[:5]:
    if var in ['is_consumption', 'category_1']: continue
    mp = lgm_explainer.model_profile(groups='is_consumption', variables=var, type='conditional')
    mp.result['_label_'].replace({'lgm_0':'production', 'lgm_1':'consumption'}, inplace=True)
    mp.plot()

Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.00s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           142.90948242   285.81896484 ... 14005.12927734
 14148.03875977 14290.94824219]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 11.93it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.64s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 11.63it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.38s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 11.64it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.06s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.31it/s]


In [10]:
for var in features[5:]:
    if var in ['is_consumption', 'category_1']: continue
    mp = lgm_explainer.model_profile(groups='is_consumption', variables=var, type='conditional')
    mp.result['_label_'].replace({'lgm_0':'production', 'lgm_1':'consumption'}, inplace=True)
    mp.plot()

Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.76s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-4.19027776e-01  8.72585403e+00  1.78707358e+01 ...  8.95779389e+02
  9.04924271e+02  9.14069153e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 11.98it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.22s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[   0.           49.43988281   98.87976563 ... 4845.10851563 4894.54839844
 4943.98828125]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.21it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.74s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[1.   1.06 1.12 ... 6.88 6.94 7.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.09it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.95s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.90it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.71s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[6.00000000e+00 1.99083105e+02 3.92166211e+02 ... 1.89281443e+04
 1.91212274e+04 1.93143105e+04]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.25it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.94s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.19it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.36s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.20it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.67s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-3.46230157e-02  8.80610896e+00  1.76468409e+01 ...  8.66357110e+02
  8.75197842e+02  8.84038574e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00,  9.73it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.72s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.   -0.98 -0.96 ...  0.96  0.98  1.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.28it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.89s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[    0.           154.80274414   309.60548828 ... 15170.66892578
 15325.47166992 15480.27441406]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 10.18it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.73s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.80158727e-02  7.97183897e+00  1.59616938e+01 ...  7.82987759e+02
  7.90977613e+02  7.98967468e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.95it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.83s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[   5.     20.12   35.24 ... 1486.76 1501.88 1517.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.00it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-0.99996316 -0.9799639  -0.95996464 ...  0.95996464  0.9799639
  0.99996316]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.09it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.84s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.         -0.98000147 -0.96000295 ...  0.9598556   0.97985413
  0.99985266]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.44it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.77s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[ 1.   1.3  1.6 ... 30.4 30.7 31. ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.96it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.77s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-7.59959221e-06  8.88523459e-05  1.85304284e-04 ...  9.44469035e-03
  9.54114228e-03  9.63759422e-03]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.54it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:03<00:00,  3.14s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[ 1.    1.11  1.22 ... 11.78 11.89 12.  ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 13.44it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.85s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-1.87222227e-01  8.10434238e+00  1.63959070e+01 ...  8.12386109e+02
  8.20677674e+02  8.28969238e+02]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.50it/s]


Calculating ceteris paribus: 100%|██████████| 1/1 [00:02<00:00,  2.69s/it]

Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '[-22.57199669 -22.13757652 -21.70315636 ...  20.00117958  20.43559975
  20.87001991]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Calculating conditional dependency: 100%|██████████| 1/1 [00:00<00:00, 12.97it/s]
