# Warning: Must be cleaned before shared.
Content is implemented in the eis_ts_fresh_xgb file. It seems like this notebook was for experimenting

In [1]:
# Notebook hosted at Google Colab: https://colab.research.google.com/drive/1zCLW5xoKo7xu4VSe-YeabJ1SsIW90OFV?usp=sharing


In [1]:
import numpy as np
import pandas as pd
from pandas.core.frame import DataFrame

from scipy.interpolate import interp1d

from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn.preprocessing import QuantileTransformer, PowerTransformer
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

import xgboost as xgb

from tsfresh.transformers import RelevantFeatureAugmenter, FeatureAugmenter
from tsfresh import extract_relevant_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters, MinimalFCParameters

from typing import Dict, Any
import numpy as np
import pandas as pd
from functools import partial

Function definitions:

In [2]:
def parse_circuit_params_from_str(params_str: str) -> Dict[str, float]:
    return {item.split(":")[0].strip(): float(item.split(":")[1].strip()) for item in params_str.split(",")}

In [3]:
def eis_dataframe_from_csv(csv_path) -> pd.DataFrame:
    """ Reads a CSV file of EIS data into a pandas dataframe

    Args:
        csv_df_path (File-Like-Object): path to file of, or buffer of, EIS data in CSV format

    Returns:
        pd.DataFrame: Dataframe with two or three columns - each row represents an EIS spectrum
            - freq      : short for frequency, column of real-number numpy arrays
            - Z         : short for impedance, column of imaginary-number numpy arrays
            - Circuit   : Equivalent Circuit Model labels assigned to the spectra (Optional)
            - Parameters: Parameters of the Equivalent Circuit Models (Optional)
    """
    df = pd.read_csv(csv_path, index_col=0)

    def real2array(arraystr: str):
        return np.array([float(c.strip("[]")) for c in arraystr.split(", ")])

    def comp2array(arraystr: str):
        return np.array([complex(c.strip("[]").replace(" ", "")) for c in arraystr.split(", ")])

    if "freq" in df.columns:
        df["freq"] = df["freq"].apply(real2array)
    if "Z" in df.columns:
        df["Z"] = df["Z"].apply(comp2array)

    return df

Prior version (interpolated variables)
```
def process_batch_element(freq, impedance, interpolated_basis):
    x_real = np.real(impedance)
    x_imag = -np.imag(impedance)

    f_real = interp1d(freq, x_real)
    f_imag = interp1d(freq, x_imag)
    return np.concatenate((f_real(interpolated_basis), f_imag(interpolated_basis)))

# Separate freq, zreal, zimag
def process_batch_element_f(interpolated_basis):
    return interpolated_basis

def process_batch_element_zreal(freq, Z, interpolated_basis):
    x = np.real(Z)
    f = interp1d(freq, x)
    return f(interpolated_basis)

def process_batch_element_zimag(freq, Z, interpolated_basis):
    x = np.imag(Z)
    f = interp1d(freq, x)
    return f(interpolated_basis)

def process_batch_element_params(Parameters):
    Params = parse_circuit_params_from_str(Parameters)
    return np.array(list(Params.values()))
```



In [4]:
def process_batch_element(freq, impedance):
    x_real = np.real(impedance)
    x_imag = -np.imag(impedance)
    return np.concatenate((x_real, x_imag))

# Separate freq, zreal, zimag
def process_batch_element_f(interpolated_basis):
    return interpolated_basis

def process_batch_element_zreal(freq, Z):
    x = np.real(Z)
    return x

def process_batch_element_zimag(freq, Z):
    x = np.imag(Z)
    f = interp1d(freq, x)
    return x

def process_batch_element_params(Parameters):
    Params = parse_circuit_params_from_str(Parameters)
    return np.array(list(Params.values()))

def process_batch_element_params_str(Parameters):
    Params = parse_circuit_params_from_str(Parameters)
    return np.array(list(Params.keys()))

In [24]:
def unwrap_df(df):
  df2 = DataFrame(columns=['id','freq','zreal','zimag'])
  for i in np.arange(df.shape[0]):
    f, zreal, zimag = df[["f","zreal","zimag"]].loc[i]
    id = np.tile(i, f.size)
    df_ = DataFrame(data=(id,np.log(f),zreal,zimag), index=['id','freq','zreal','zimag']).T
    df2 = df2.append(df_, ignore_index=True)
  return df2

In [6]:
def transform_params(p, mask_skip):
    p_ = np.log(p)
    p_[mask_skip] = p[mask_skip]
    return p_

def invert_params(p, mask_skip):
    p_ = np.exp(p)
    p_[mask_skip] = p[mask_skip]
    return p_

Process data:

In [11]:
df = eis_dataframe_from_csv("./data/train_data.csv")
df.shape

(7462, 4)

In [12]:
df

Unnamed: 0,freq,Z,Circuit,Parameters
0,"[0.1, 0.148398179, 0.220220195, 0.326802759, 0...","[(309.82561192-156.06088233j), (267.46983919-1...",L-R-RCPE-RCPE-RCPE,"L1: 2.94e-08, R1: 4.51e+00, R2: 5.19e-02, CPE1..."
1,"[1.0, 1.34339933, 1.80472177, 2.42446202, 3.25...","[(344.50700012-0.87321496j), (344.36191597-0.9...",RC-RC-RCPE-RCPE,"R1: 2.08e+02, R2: 2.50e+01, R3: 9.57e+01, R4: ..."
2,"[1.0, 1.26360956, 1.59670912, 2.01761691, 2.54...","[(3080.15920083-80.84202473j), (3071.83539583-...",L-R-RCPE-RCPE-RCPE,"L1: 3.35e-08, R1: 6.95e+01, R2: 7.49e+01, CPE1..."
3,"[10.0, 13.4990445, 18.2224203, 24.5985264, 33....","[(930.93345951-0.0068507146j), (930.93327153-0...",L-R-RCPE,"L1: 8.43e-07, R1: 9.06e+01, R2: 8.40e+02, CPE1..."
4,"[0.01, 0.0148907532, 0.0221734532, 0.033017942...","[(405.07355219-0.0149508921j), (405.07110253-0...",RCPE-RCPE-RCPE,"R1: 1.03e+01, R2: 6.71e-01, R3: 3.94e+02, CPE1..."
...,...,...,...,...
7457,"[10.0, 13.4596032, 18.1160919, 24.383541, 32.8...","[(4953.31225754-4814.61138816j), (4269.4031861...",RCPE-RCPE,"R1: 2.47e+01, R2: 2.95e+05, CPE1_t: 9.41e-01, ..."
7458,"[10.0, 12.6485522, 15.9985872, 20.2358965, 25....","[(73538.51000223-1776.24017654j), (73413.60429...",L-R-RCPE-RCPE-RCPE,"L1: 1.05e-08, R1: 2.81e+01, R2: 6.40e+01, CPE1..."
7459,"[1.0, 1.34453288, 1.80776868, 2.43060443, 3.26...","[(82.1093149-51.12982215j), (72.40330299-47.29...",RCPE-RCPE-RCPE,"R1: 4.78e+00, R2: 8.77e-01, R3: 2.85e+02, CPE1..."
7460,"[0.01, 0.0134051824, 0.0179698915, 0.024088967...","[(32.37740171-5.15196774e-05j), (32.37740063-6...",RCPE-RCPE,"R1: 5.65e+00, R2: 2.67e+01, CPE1_t: 9.21e-01, ..."


In [18]:
df["f"] = df.apply(lambda x: process_batch_element_f(x.freq), axis=1)
df["zreal"] = df.apply(lambda x: process_batch_element_zreal(x.freq, x.Z), axis=1)
df["zimag"] = df.apply(lambda x: process_batch_element_zimag(x.freq, x.Z), axis=1)
df["param_strs"] = df.apply(lambda x: process_batch_element_params_str(x.Parameters), axis=1)
df["param_values"] = df.apply(lambda x: process_batch_element_params(x.Parameters), axis=1)

In [19]:
df

Unnamed: 0,freq,Z,Circuit,Parameters,f,zreal,zimag,param_strs,param_values
0,"[0.1, 0.148398179, 0.220220195, 0.326802759, 0...","[(309.82561192-156.06088233j), (267.46983919-1...",L-R-RCPE-RCPE-RCPE,"L1: 2.94e-08, R1: 4.51e+00, R2: 5.19e-02, CPE1...","[0.1, 0.148398179, 0.220220195, 0.326802759, 0...","[309.82561192, 267.46983919, 229.38061493, 196...","[-156.06088233, -146.21033646, -133.4855237, -...","[L1, R1, R2, CPE1_t, CPE1_C, R3, CPE2_t, CPE2_...","[2.94e-08, 4.51, 0.0519, 0.673, 2.82e-07, 44.2..."
1,"[1.0, 1.34339933, 1.80472177, 2.42446202, 3.25...","[(344.50700012-0.87321496j), (344.36191597-0.9...",RC-RC-RCPE-RCPE,"R1: 2.08e+02, R2: 2.50e+01, R3: 9.57e+01, R4: ...","[1.0, 1.34339933, 1.80472177, 2.42446202, 3.25...","[344.50700012, 344.36191597, 344.19647198, 344...","[-0.87321496, -0.99738889, -1.13909869, -1.300...","[R1, R2, R3, R4, C2, CPE3_C, CPE4_t, CPE4_C, C...","[208.0, 25.0, 95.7, 16.3, 1.82e-07, 9.06e-06, ..."
2,"[1.0, 1.26360956, 1.59670912, 2.01761691, 2.54...","[(3080.15920083-80.84202473j), (3071.83539583-...",L-R-RCPE-RCPE-RCPE,"L1: 3.35e-08, R1: 6.95e+01, R2: 7.49e+01, CPE1...","[1.0, 1.26360956, 1.59670912, 2.01761691, 2.54...","[3080.15920083, 3071.83539583, 3061.93812951, ...","[-80.84202473, -94.50641483, -110.36642266, -1...","[L1, R1, R2, CPE1_t, CPE1_C, R3, CPE2_t, CPE2_...","[3.35e-08, 69.5, 74.9, 0.607, 4.62e-12, 137.0,..."
3,"[10.0, 13.4990445, 18.2224203, 24.5985264, 33....","[(930.93345951-0.0068507146j), (930.93327153-0...",L-R-RCPE,"L1: 8.43e-07, R1: 9.06e+01, R2: 8.40e+02, CPE1...","[10.0, 13.4990445, 18.2224203, 24.5985264, 33....","[930.93345951, 930.93327153, 930.93302176, 930...","[-0.0068507146, -0.00910185162, -0.0120927041,...","[L1, R1, R2, CPE1_t, CPE1_C]","[8.43e-07, 90.6, 840.0, 0.947, 1.1e-09]"
4,"[0.01, 0.0148907532, 0.0221734532, 0.033017942...","[(405.07355219-0.0149508921j), (405.07110253-0...",RCPE-RCPE-RCPE,"R1: 1.03e+01, R2: 6.71e-01, R3: 3.94e+02, CPE1...","[0.01, 0.0148907532, 0.0221734532, 0.033017942...","[405.07355219, 405.07110253, 405.06786553, 405...","[-0.0149508921, -0.0197551995, -0.026103215, -...","[R1, R2, R3, CPE1_t, CPE1_C, CPE2_t, CPE2_C, C...","[10.3, 0.671, 394.0, 0.519, 1.92e-09, 0.741, 8..."
...,...,...,...,...,...,...,...,...,...
7457,"[10.0, 13.4596032, 18.1160919, 24.383541, 32.8...","[(4953.31225754-4814.61138816j), (4269.4031861...",RCPE-RCPE,"R1: 2.47e+01, R2: 2.95e+05, CPE1_t: 9.41e-01, ...","[10.0, 13.4596032, 18.1160919, 24.383541, 32.8...","[4953.31225754, 4269.40318615, 3680.28615178, ...","[-4814.61138816, -4165.41666719, -3601.4380483...","[R1, R2, CPE1_t, CPE1_C, CPE2_t, CPE2_C]","[24.7, 295000.0, 0.941, 1.29e-11, 0.503, 4.48e..."
7458,"[10.0, 12.6485522, 15.9985872, 20.2358965, 25....","[(73538.51000223-1776.24017654j), (73413.60429...",L-R-RCPE-RCPE-RCPE,"L1: 1.05e-08, R1: 2.81e+01, R2: 6.40e+01, CPE1...","[10.0, 12.6485522, 15.9985872, 20.2358965, 25....","[73538.51000223, 73413.60429656, 73304.9395675...","[-1776.24017654, -1592.10621049, -1461.6962630...","[L1, R1, R2, CPE1_t, CPE1_C, R3, CPE2_t, CPE2_...","[1.05e-08, 28.1, 64.0, 0.782, 1.58e-08, 73100...."
7459,"[1.0, 1.34453288, 1.80776868, 2.43060443, 3.26...","[(82.1093149-51.12982215j), (72.40330299-47.29...",RCPE-RCPE-RCPE,"R1: 4.78e+00, R2: 8.77e-01, R3: 2.85e+02, CPE1...","[1.0, 1.34453288, 1.80776868, 2.43060443, 3.26...","[82.1093149, 72.40330299, 63.66501847, 55.8809...","[-51.12982215, -47.29789889, -43.31241432, -39...","[R1, R2, R3, CPE1_t, CPE1_C, CPE2_t, CPE2_C, C...","[4.78, 0.877, 285.0, 0.518, 2.87e-11, 0.894, 1..."
7460,"[0.01, 0.0134051824, 0.0179698915, 0.024088967...","[(32.37740171-5.15196774e-05j), (32.37740063-6...",RCPE-RCPE,"R1: 5.65e+00, R2: 2.67e+01, CPE1_t: 9.21e-01, ...","[0.01, 0.0134051824, 0.0179698915, 0.024088967...","[32.37740171, 32.37740063, 32.3773992, 32.3773...","[-5.15196774e-05, -6.82381469e-05, -9.03818659...","[R1, R2, CPE1_t, CPE1_C, CPE2_t, CPE2_C]","[5.65, 26.7, 0.921, 1.52e-12, 0.959, 5.98e-06]"


For tsfresh we need to 'unwrap' all the measurements into a dataframe with measurement ids, freq, zreal, and zimag columns

In [25]:
df_ts = unwrap_df(df)
df_ts.shape, len(np.unique(df_ts["id"]))

((366441, 4), 7462)

In [26]:
df_ts

Unnamed: 0,id,freq,zreal,zimag
0,0.0,-2.302585,309.825612,-156.060882
1,0.0,-1.907856,267.469839,-146.210336
2,0.0,-1.513127,229.380615,-133.485524
3,0.0,-1.118398,196.168815,-119.162289
4,0.0,-0.723670,167.918745,-104.412341
...,...,...,...,...
366436,7461.0,9.911127,0.693812,-3.099571
366437,7461.0,10.311577,0.492535,-2.193738
366438,7461.0,10.712026,0.351077,-1.552448
366439,7461.0,11.112476,0.251471,-1.098535


Train a model for each class of circuit

In [None]:
circuits = np.unique(df["Circuit"])
circuits

array(['L-R-RCPE', 'L-R-RCPE-RCPE', 'L-R-RCPE-RCPE-RCPE', 'RC-G-G',
       'RC-RC-RCPE-RCPE', 'RCPE-RCPE', 'RCPE-RCPE-RCPE',
       'RCPE-RCPE-RCPE-RCPE', 'Rs_Ws'], dtype=object)

In [None]:
dfs = list()
for i in np.arange(len(circuits)):
  dfs.append(df[df["Circuit"] == circuits[i]])
dfs[0].index

Int64Index([   3,   16,   19,   20,   24,   56,   58,   60,   64,   68,
            ...
            7390, 7391, 7401, 7411, 7414, 7418, 7429, 7435, 7447, 7448],
           dtype='int64', length=865)

Working model: quantile transform on target.

In [None]:
ppls = list()
param_values_pred = list(range(df.shape[0]))
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = MultiOutputRegressor(estimator=xgb.XGBRegressor())
  regr = TransformedTargetRegressor(regressor=mdl,
                                    transformer=QuantileTransformer(n_quantiles=10, random_state=0))
                                    #transformer=PowerTransformer())
  ppl = Pipeline([
        ('augmenter', FeatureAugmenter(column_id='id', column_sort='freq', default_fc_parameters=ComprehensiveFCParameters())),
        ('regressor', regr)
      ])
  ppl.set_params(augmenter__timeseries_container=df_ts_);
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4g" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4g" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls.append(ppl)
  for idx, i in zip(df_.index, np.arange(y_pred.shape[0])):
    param_values_pred[idx] = y_pred[i,:]

 
Fitting regression for L-R-RCPE


Feature Extraction: 100%|██████████| 1384/1384 [01:56<00:00, 11.84it/s]




Feature Extraction: 100%|██████████| 346/346 [00:25<00:00, 13.62it/s]


MAE:4745.8882
L1 MAE:1.085e-06
R1 MAE:34.59
R2 MAE:2.369e+04
CPE1_t MAE:0.04888
CPE1_C MAE:0.009586
 
Fitting regression for L-R-RCPE-RCPE


Feature Extraction: 100%|██████████| 1440/1440 [01:42<00:00, 14.08it/s]




Feature Extraction: 100%|██████████| 360/360 [00:25<00:00, 14.37it/s]


MAE:5538.6045
L1 MAE:1.37e-06
R1 MAE:61.53
R2 MAE:272.1
CPE1_t MAE:0.1241
CPE1_C MAE:3.452e-06
R3 MAE:4.397e+04
CPE2_t MAE:0.0502
CPE2_C MAE:0.009648
 
Fitting regression for L-R-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1400/1400 [01:37<00:00, 14.29it/s]




Feature Extraction: 100%|██████████| 352/352 [00:24<00:00, 14.35it/s]


MAE:5479.9450
L1 MAE:1.105e-06
R1 MAE:47.83
R2 MAE:307.5
CPE1_t MAE:0.1343
CPE1_C MAE:8.507e-07
R3 MAE:1997
CPE2_t MAE:0.1054
CPE2_C MAE:0.0002995
R4 MAE:5.793e+04
CPE3_t MAE:0.08104
CPE3_C MAE:0.01627
 
Fitting regression for RC-G-G


Feature Extraction: 100%|██████████| 1424/1424 [01:41<00:00, 13.99it/s]




Feature Extraction: 100%|██████████| 356/356 [00:25<00:00, 14.17it/s]


MAE:7.4466
R1 MAE:17.32
C1 MAE:3.851e-06
R_g1 MAE:1.271
t_g1 MAE:0.7613
R_g2 MAE:14.81
t_g2 MAE:10.52
 
Fitting regression for RC-RC-RCPE-RCPE


Feature Extraction: 100%|██████████| 1450/1450 [01:42<00:00, 14.09it/s]




Feature Extraction: 100%|██████████| 364/364 [00:25<00:00, 14.46it/s]


MAE:6091.4197
R1 MAE:4.56
R2 MAE:4.231
R3 MAE:13.44
R4 MAE:6.089e+04
C2 MAE:7.181e-06
CPE3_C MAE:7.607e-05
CPE4_t MAE:0.08951
CPE4_C MAE:0.4147
C1 MAE:5.656e-17
CPE3_t MAE:2.384e-08
 
Fitting regression for RCPE-RCPE


Feature Extraction: 100%|██████████| 1368/1368 [01:37<00:00, 14.01it/s]




Feature Extraction: 100%|██████████| 342/342 [00:24<00:00, 14.18it/s]


MAE:3680.9706
R1 MAE:45.49
R2 MAE:2.204e+04
CPE1_t MAE:0.1125
CPE1_C MAE:0.001267
CPE2_t MAE:0.05015
CPE2_C MAE:0.01113
 
Fitting regression for RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1470/1470 [01:43<00:00, 14.14it/s]




Feature Extraction: 100%|██████████| 368/368 [00:25<00:00, 14.28it/s]


MAE:3267.4488
R1 MAE:10.85
R2 MAE:336.6
R3 MAE:2.906e+04
CPE1_t MAE:0.1303
CPE1_C MAE:1.584e-06
CPE2_t MAE:0.1149
CPE2_C MAE:4.308e-05
CPE3_t MAE:0.07012
CPE3_C MAE:0.02655
 
Fitting regression for RCPE-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1478/1478 [01:44<00:00, 14.20it/s]




Feature Extraction: 100%|██████████| 370/370 [00:26<00:00, 14.20it/s]


MAE:8039.6460
R1 MAE:30.83
R2 MAE:161.9
R3 MAE:2368
R4 MAE:9.391e+04
CPE1_t MAE:0.1505
CPE1_C MAE:4.192e-07
CPE2_t MAE:0.1298
CPE2_C MAE:0.001242
CPE3_t MAE:0.1138
CPE3_C MAE:0.005052
CPE4_t MAE:0.0915
CPE4_C MAE:0.02163
 
Fitting regression for Rs_Ws


Feature Extraction: 100%|██████████| 520/520 [00:36<00:00, 14.30it/s]




Feature Extraction: 100%|██████████| 132/132 [00:09<00:00, 14.31it/s]


MAE:105.7937
R1 MAE:21.24
W1_R MAE:236.3
W1_T MAE:165.4
W1_p MAE:0.2096


In [None]:
# tsfresh-xgboost no quantile transform
ppls = list()
param_values_pred = list(range(df.shape[0]))
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = MultiOutputRegressor(estimator=xgb.XGBRegressor())
  #regr = TransformedTargetRegressor(regressor=mdl,
  #                                  transformer=QuantileTransformer(n_quantiles=10, random_state=0))
  #                                  #transformer=PowerTransformer())
  ppl = Pipeline([
        ('augmenter', FeatureAugmenter(column_id='id', column_sort='freq', default_fc_parameters=ComprehensiveFCParameters())),
        ('regressor', mdl)
      ])
  ppl.set_params(augmenter__timeseries_container=df_ts_);
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4g" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4g" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls.append(ppl)
  for idx, i in zip(df_.index, np.arange(y_pred.shape[0])):
    param_values_pred[idx] = y_pred[i,:]

 
Fitting regression for L-R-RCPE


Feature Extraction: 100%|██████████| 1384/1384 [01:36<00:00, 14.37it/s]




Feature Extraction: 100%|██████████| 346/346 [00:24<00:00, 14.41it/s]


MAE:3836
L1 MAE:1.352e-05
R1 MAE:40.09
R2 MAE:1.914e+04
CPE1_t MAE:0.04905
CPE1_C MAE:0.01479
 
Fitting regression for L-R-RCPE-RCPE


Feature Extraction: 100%|██████████| 1440/1440 [01:42<00:00, 14.12it/s]




Feature Extraction: 100%|██████████| 360/360 [00:26<00:00, 13.80it/s]


MAE:6399
L1 MAE:1.329e-05
R1 MAE:67.65
R2 MAE:296.9
CPE1_t MAE:0.122
CPE1_C MAE:1.599e-05
R3 MAE:5.083e+04
CPE2_t MAE:0.05153
CPE2_C MAE:0.01181
 
Fitting regression for L-R-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1400/1400 [01:37<00:00, 14.32it/s]




Feature Extraction: 100%|██████████| 352/352 [00:24<00:00, 14.49it/s]


MAE:9246
L1 MAE:1.359e-05
R1 MAE:64.84
R2 MAE:168
CPE1_t MAE:0.1315
CPE1_C MAE:1.497e-05
R3 MAE:2778
CPE2_t MAE:0.1041
CPE2_C MAE:0.0005445
R4 MAE:9.869e+04
CPE3_t MAE:0.07981
CPE3_C MAE:0.02701
 
Fitting regression for RC-G-G


Feature Extraction: 100%|██████████| 1424/1424 [01:40<00:00, 14.20it/s]




Feature Extraction: 100%|██████████| 356/356 [00:24<00:00, 14.36it/s]


MAE:6.022
R1 MAE:5.98
C1 MAE:1.832e-05
R_g1 MAE:2.187
t_g1 MAE:1.149
R_g2 MAE:15.3
t_g2 MAE:11.51
 
Fitting regression for RC-RC-RCPE-RCPE


Feature Extraction: 100%|██████████| 1450/1450 [01:43<00:00, 14.08it/s]




Feature Extraction: 100%|██████████| 364/364 [00:25<00:00, 14.39it/s]


MAE:9807
R1 MAE:4.029
R2 MAE:7.525
R3 MAE:10.72
R4 MAE:9.805e+04
C2 MAE:2.246e-05
CPE3_C MAE:0.0001083
CPE4_t MAE:0.091
CPE4_C MAE:0.4991
C1 MAE:1.356e-05
CPE3_t MAE:1.075e-05
 
Fitting regression for RCPE-RCPE


Feature Extraction: 100%|██████████| 1368/1368 [01:40<00:00, 13.63it/s]




Feature Extraction: 100%|██████████| 342/342 [00:24<00:00, 14.20it/s]


MAE:3768
R1 MAE:38.54
R2 MAE:2.257e+04
CPE1_t MAE:0.114
CPE1_C MAE:0.001225
CPE2_t MAE:0.05142
CPE2_C MAE:0.01332
 
Fitting regression for RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1470/1470 [01:43<00:00, 14.20it/s]




Feature Extraction: 100%|██████████| 368/368 [00:25<00:00, 14.25it/s]


MAE:5426
R1 MAE:12.5
R2 MAE:247.1
R3 MAE:4.857e+04
CPE1_t MAE:0.131
CPE1_C MAE:1.501e-05
CPE2_t MAE:0.1149
CPE2_C MAE:7.918e-05
CPE3_t MAE:0.0688
CPE3_C MAE:0.03097
 
Fitting regression for RCPE-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1478/1478 [01:44<00:00, 14.19it/s]




Feature Extraction: 100%|██████████| 370/370 [00:27<00:00, 13.70it/s]


MAE:1.144e+04
R1 MAE:33.57
R2 MAE:227.6
R3 MAE:2660
R4 MAE:1.344e+05
CPE1_t MAE:0.1517
CPE1_C MAE:2.1e-05
CPE2_t MAE:0.133
CPE2_C MAE:0.002711
CPE3_t MAE:0.1115
CPE3_C MAE:0.006446
CPE4_t MAE:0.09264
CPE4_C MAE:0.0299
 
Fitting regression for Rs_Ws


Feature Extraction: 100%|██████████| 520/520 [00:36<00:00, 14.09it/s]




Feature Extraction: 100%|██████████| 132/132 [00:09<00:00, 14.32it/s]


MAE:150.5
R1 MAE:41
W1_R MAE:223.3
W1_T MAE:337.5
W1_p MAE:0.189


In [None]:
# dummy model, no quantile transform
ppls_0 = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = DummyRegressor(strategy='median')
  #regr = TransformedTargetRegressor(regressor=mdl, 
  #                                  func=partial(transform_params, mask_skip=mask),
  #                                  inverse_func=partial(invert_params, mask_skip=mask))
  ppl = Pipeline([
        ('regressor', mdl)
      ])
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4g" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4g" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls_0.append(ppl)

 
Fitting regression for L-R-RCPE
MAE:7342
L1 MAE:1.372e-06
R1 MAE:41.88
R2 MAE:3.667e+04
CPE1_t MAE:0.1324
CPE1_C MAE:0.01403
 
Fitting regression for L-R-RCPE-RCPE
MAE:6860
L1 MAE:1.671e-06
R1 MAE:65.77
R2 MAE:290.9
CPE1_t MAE:0.1266
CPE1_C MAE:3.477e-06
R3 MAE:5.452e+04
CPE2_t MAE:0.136
CPE2_C MAE:0.01115
 
Fitting regression for L-R-RCPE-RCPE-RCPE
MAE:6933
L1 MAE:1.23e-06
R1 MAE:52.71
R2 MAE:336.5
CPE1_t MAE:0.1257
CPE1_C MAE:8.535e-07
R3 MAE:2181
CPE2_t MAE:0.1226
CPE2_C MAE:0.0003276
R4 MAE:7.369e+04
CPE3_t MAE:0.1193
CPE3_C MAE:0.01829
 
Fitting regression for RC-G-G
MAE:22.4
R1 MAE:74.1
C1 MAE:4.985e-06
R_g1 MAE:1.283
t_g1 MAE:0.7691
R_g2 MAE:45.08
t_g2 MAE:13.17
 
Fitting regression for RC-RC-RCPE-RCPE
MAE:5967
R1 MAE:33.19
R2 MAE:6.394
R3 MAE:40.62
R4 MAE:5.959e+04
C2 MAE:1.216e-05
CPE3_C MAE:0.0001672
CPE4_t MAE:0.1629
CPE4_C MAE:0.5516
C1 MAE:0
CPE3_t MAE:0
 
Fitting regression for RCPE-RCPE
MAE:4632
R1 MAE:50.85
R2 MAE:2.774e+04
CPE1_t MAE:0.1176
CPE1_C MAE:0.0009411
CPE2_

In [None]:
# dummy model, quantile transform
ppls_0 = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = DummyRegressor(strategy='median')
  regr = TransformedTargetRegressor(regressor=mdl,
                                    transformer=QuantileTransformer(n_quantiles=10, random_state=0))
  ppl = Pipeline([
        ('regressor', regr)
      ])
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4g" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4g" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls_0.append(ppl)

 
Fitting regression for L-R-RCPE
MAE:7342
L1 MAE:1.372e-06
R1 MAE:41.88
R2 MAE:3.667e+04
CPE1_t MAE:0.1324
CPE1_C MAE:0.01403
 
Fitting regression for L-R-RCPE-RCPE
MAE:6860
L1 MAE:1.671e-06
R1 MAE:65.77
R2 MAE:290.9
CPE1_t MAE:0.1266
CPE1_C MAE:3.477e-06
R3 MAE:5.452e+04
CPE2_t MAE:0.136
CPE2_C MAE:0.01115
 
Fitting regression for L-R-RCPE-RCPE-RCPE
MAE:6933
L1 MAE:1.23e-06
R1 MAE:52.71
R2 MAE:336.5
CPE1_t MAE:0.1257
CPE1_C MAE:8.535e-07
R3 MAE:2181
CPE2_t MAE:0.1226
CPE2_C MAE:0.0003276
R4 MAE:7.369e+04
CPE3_t MAE:0.1193
CPE3_C MAE:0.01829
 
Fitting regression for RC-G-G
MAE:22.4
R1 MAE:74.1
C1 MAE:4.985e-06
R_g1 MAE:1.283
t_g1 MAE:0.7691
R_g2 MAE:45.08
t_g2 MAE:13.17
 
Fitting regression for RC-RC-RCPE-RCPE
MAE:5967
R1 MAE:33.19
R2 MAE:6.394
R3 MAE:40.62
R4 MAE:5.959e+04
C2 MAE:1.216e-05
CPE3_C MAE:0.0001672
CPE4_t MAE:0.1629
CPE4_C MAE:0.5516
C1 MAE:0
CPE3_t MAE:0
 
Fitting regression for RCPE-RCPE
MAE:4632
R1 MAE:50.85
R2 MAE:2.774e+04
CPE1_t MAE:0.1176
CPE1_C MAE:0.0009411
CPE2_

Model trained on all data for submission.

In [None]:
#train tsfresh-xgboost-quantile models on all data
ppls = list()
param_values_pred = list(range(df.shape[0]))
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  mdl = MultiOutputRegressor(estimator=xgb.XGBRegressor())
  regr = TransformedTargetRegressor(regressor=mdl,
                                    transformer=QuantileTransformer(n_quantiles=10, random_state=0))
  ppl = Pipeline([
        ('augmenter', FeatureAugmenter(column_id='id', column_sort='freq', default_fc_parameters=ComprehensiveFCParameters())),
        ('regressor', regr)
      ])
  ppl.set_params(augmenter__timeseries_container=df_ts_);
  ppl.fit(df_x, df_y)
  ppls.append(ppl)

  ####### help
  # save fits
  for idx, i in zip(df_.index, np.arange(y_pred.shape[0])):
    param_values_pred[idx] = y_pred[i,:]


#pickle each model in ppls

#import pickle
#for mdl in ppls:
#  file = pickle.dump(mdl)

 
Fitting regression for L-R-RCPE


Feature Extraction: 100%|██████████| 1730/1730 [02:24<00:00, 12.00it/s]


 
Fitting regression for L-R-RCPE-RCPE


Feature Extraction: 100%|██████████| 1800/1800 [02:32<00:00, 11.80it/s]


 
Fitting regression for L-R-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1752/1752 [02:41<00:00, 10.82it/s]


 
Fitting regression for RC-G-G


Feature Extraction: 100%|██████████| 1780/1780 [02:38<00:00, 11.23it/s]


 
Fitting regression for RC-RC-RCPE-RCPE


Feature Extraction: 100%|██████████| 1814/1814 [02:32<00:00, 11.90it/s]


 
Fitting regression for RCPE-RCPE


Feature Extraction: 100%|██████████| 1710/1710 [02:23<00:00, 11.95it/s]


 
Fitting regression for RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1838/1838 [02:34<00:00, 11.86it/s]


 
Fitting regression for RCPE-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1848/1848 [02:32<00:00, 12.11it/s]


 
Fitting regression for Rs_Ws


Feature Extraction: 100%|██████████| 652/652 [00:53<00:00, 12.17it/s]




Export df to csv

In [None]:
param_values_pred

[array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([0.17008224, 0.14053969, 0.03647374, 1.850966  ], dtype=float32),
 array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([  4.6423497, 493.56506  ,   3.6793091,   2.0064642], dtype=float32),
 array([0.17008224, 0.14053969, 0.03647374, 1.850966  ], dtype=float32),
 array([0.17008224, 0.14053969, 0.03647374, 1.850966  ], dtype=float32),
 array([4.2765201e+02, 2.8244915e+00, 4.3445881e-03, 4.3264108e+00],
       dtype=float32),
 array([0.17008224, 0.14053969, 0.03647374, 1.850966  ], dty

In [None]:
def write_param_str(param_values, param_strs):
    pstr = ""
    for i in np.arange(len(param_values)):
      if i == 0:
        pstr += param_strs[i] + ": %0.2g" % (param_values[i])
      else:
        pstr += ", " + param_strs[i] + ": %0.2g" % (param_values[i])
    return pstr

In [None]:
df["param_values_pred"] = param_values_pred
df_ = pd.DataFrame(data=df["Circuit"])
df_["Parameters"] = df.apply(lambda x: write_param_str(x.param_values_pred, x.param_strs), axis=1)

df_

TypeError: ignored

In [None]:
df_.to_csv('submission.csv')

Other attempts that don't work.
Custom log transform:

In [None]:
ppls = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = MultiOutputRegressor(estimator=xgb.XGBRegressor())
  regr = TransformedTargetRegressor(regressor=mdl, 
                                    func=partial(transform_params, mask_skip=mask),
                                    inverse_func=partial(invert_params, mask_skip=mask))
  ppl = Pipeline([
        ('augmenter', FeatureAugmenter(column_id='id', column_sort='freq', default_fc_parameters=ComprehensiveFCParameters())),
        ('regressor', regr)
      ])
  ppl.set_params(augmenter__timeseries_container=df_ts_);
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4f" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4f" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls.append(ppl)


 
Fitting regression for L-R-RCPE


Feature Extraction: 100%|██████████| 1384/1384 [01:56<00:00, 11.86it/s]
  import sys




Feature Extraction: 100%|██████████| 346/346 [00:27<00:00, 12.43it/s]


MAPE:634548.4890
L1 MAPE:3168187.4009
R1 MAPE:18.5833
R2 MAPE:0.8513
CPE1_t MAPE:0.0857
CPE1_C MAPE:4535.5240
 
Fitting regression for L-R-RCPE-RCPE


Feature Extraction: 100%|██████████| 1440/1440 [02:18<00:00, 10.36it/s]
  import sys
  import sys




Feature Extraction: 100%|██████████| 360/360 [00:39<00:00,  9.21it/s]
  import sys


ValueError: ignored

dummy regressor

In [None]:
ppls_0 = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = DummyRegressor(strategy='median')
  regr = TransformedTargetRegressor(regressor=mdl, 
                                    func=partial(transform_params, mask_skip=mask),
                                    inverse_func=partial(invert_params, mask_skip=mask))
  ppl = Pipeline([
        ('regressor', regr)
      ])
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4f" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4f" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls_0.append(ppl)


 
Fitting regression for L-R-RCPE
MAE:7342.5811
L1 MAE:0.0869
R1 MAE:41.8808
R2 MAE:36670.7032
CPE1_t MAE:0.1384
CPE1_C MAE:0.0962
 
Fitting regression for L-R-RCPE-RCPE
MAE:6860.2577
L1 MAE:0.1662
R1 MAE:65.7694
R2 MAE:291.0060
CPE1_t MAE:0.1368
CPE1_C MAE:0.2130
R3 MAE:54524.5107
CPE2_t MAE:0.1459
CPE2_C MAE:0.1139
 
Fitting regression for L-R-RCPE-RCPE-RCPE
MAE:6932.6560
L1 MAE:0.2596
R1 MAE:52.6981
R2 MAE:336.4982
CPE1_t MAE:0.1423
CPE1_C MAE:0.3262
R3 MAE:2181.0637
CPE2_t MAE:0.1379
CPE2_C MAE:0.1955
R4 MAE:73687.6291
CPE3_t MAE:0.1368
CPE3_C MAE:0.1281
 
Fitting regression for RC-G-G
MAE:22.4008
R1 MAE:74.0982
C1 MAE:0.0000
R_g1 MAE:1.2834
t_g1 MAE:0.7691
R_g2 MAE:45.0847
t_g2 MAE:13.1691
 
Fitting regression for RC-RC-RCPE-RCPE
MAE:5966.9245
R1 MAE:33.1936
R2 MAE:6.4065
R3 MAE:40.6432
R4 MAE:59587.7611
C2 MAE:0.1398
CPE3_C MAE:0.1100
CPE4_t MAE:0.1712
CPE4_C MAE:0.5881
C1 MAE:0.2201
CPE3_t MAE:0.0110
 
Fitting regression for RCPE-RCPE
MAE:4631.5568
R1 MAE:50.8647
R2 MAE:27737.88

  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


MAE:6428.8155
R1 MAE:12.9561
R2 MAE:381.7258
R3 MAE:57463.5617
CPE1_t MAE:0.1396
CPE1_C MAE:0.3217
CPE2_t MAE:0.1399
CPE2_C MAE:0.2023
CPE3_t MAE:0.1428
CPE3_C MAE:0.1491
 
Fitting regression for RCPE-RCPE-RCPE-RCPE
MAE:8519.5391
R1 MAE:31.9382
R2 MAE:168.4325
R3 MAE:2515.8436
R4 MAE:99516.5397
CPE1_t MAE:0.1628
CPE1_C MAE:0.4466
CPE2_t MAE:0.1436
CPE2_C MAE:0.2983
CPE3_t MAE:0.1432
CPE3_C MAE:0.2106
CPE4_t MAE:0.1466
CPE4_C MAE:0.1634
 
Fitting regression for Rs_Ws
MAE:206.5543
R1 MAE:56.6867
W1_R MAE:674.5952
W1_T MAE:94.0620
W1_p MAE:0.8733


  import sys
  import sys


wihtout log transform

In [None]:
ppls_2 = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  mdl = MultiOutputRegressor(estimator=xgb.XGBRegressor())
  ppl = Pipeline([
        ('augmenter', FeatureAugmenter(column_id='id', column_sort='freq', default_fc_parameters=ComprehensiveFCParameters())),
        ('regressor', mdl)
      ])
  ppl.set_params(augmenter__timeseries_container=df_ts_);
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4f" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4f" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls_2.append(ppl)

 
Fitting regression for L-R-RCPE


Feature Extraction: 100%|██████████| 1384/1384 [01:54<00:00, 12.11it/s]




Feature Extraction: 100%|██████████| 346/346 [00:28<00:00, 12.29it/s]


MAE:3835.6250
L1 MAE:0.0000
R1 MAE:40.0877
R2 MAE:19137.9736
CPE1_t MAE:0.0490
CPE1_C MAE:0.0148
 
Fitting regression for L-R-RCPE-RCPE


Feature Extraction: 100%|██████████| 1440/1440 [01:58<00:00, 12.18it/s]




Feature Extraction: 100%|██████████| 360/360 [00:30<00:00, 11.96it/s]


MAE:6398.9935
L1 MAE:0.0000
R1 MAE:67.6518
R2 MAE:296.9033
CPE1_t MAE:0.1220
CPE1_C MAE:0.0000
R3 MAE:50827.2076
CPE2_t MAE:0.0515
CPE2_C MAE:0.0118
 
Fitting regression for L-R-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1400/1400 [01:56<00:00, 11.99it/s]




Feature Extraction: 100%|██████████| 352/352 [00:28<00:00, 12.21it/s]


MAE:9245.7841
L1 MAE:0.0000
R1 MAE:64.8399
R2 MAE:168.0301
CPE1_t MAE:0.1315
CPE1_C MAE:0.0000
R3 MAE:2777.7050
CPE2_t MAE:0.1041
CPE2_C MAE:0.0005
R4 MAE:98692.7075
CPE3_t MAE:0.0798
CPE3_C MAE:0.0270
 
Fitting regression for RC-G-G


Feature Extraction: 100%|██████████| 1424/1424 [01:58<00:00, 12.01it/s]




Feature Extraction: 100%|██████████| 356/356 [00:29<00:00, 12.05it/s]


MAE:6.0217
R1 MAE:5.9802
C1 MAE:0.0000
R_g1 MAE:2.1870
t_g1 MAE:1.1488
R_g2 MAE:15.3042
t_g2 MAE:11.5097
 
Fitting regression for RC-RC-RCPE-RCPE


Feature Extraction: 100%|██████████| 1450/1450 [02:00<00:00, 12.01it/s]




Feature Extraction: 100%|██████████| 364/364 [00:29<00:00, 12.19it/s]


MAE:9806.8950
R1 MAE:4.0285
R2 MAE:7.5253
R3 MAE:10.7159
R4 MAE:98046.0900
C2 MAE:0.0000
CPE3_C MAE:0.0001
CPE4_t MAE:0.0910
CPE4_C MAE:0.4991
C1 MAE:0.0000
CPE3_t MAE:0.0000
 
Fitting regression for RCPE-RCPE


Feature Extraction: 100%|██████████| 1368/1368 [02:08<00:00, 10.63it/s]




Feature Extraction: 100%|██████████| 342/342 [00:32<00:00, 10.41it/s]


MAE:3768.2587
R1 MAE:38.5366
R2 MAE:22570.8357
CPE1_t MAE:0.1140
CPE1_C MAE:0.0012
CPE2_t MAE:0.0514
CPE2_C MAE:0.0133
 
Fitting regression for RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1470/1470 [01:58<00:00, 12.36it/s]




Feature Extraction: 100%|██████████| 368/368 [00:29<00:00, 12.33it/s]


MAE:5425.6460
R1 MAE:12.5045
R2 MAE:247.1067
R3 MAE:48570.8574
CPE1_t MAE:0.1310
CPE1_C MAE:0.0000
CPE2_t MAE:0.1149
CPE2_C MAE:0.0001
CPE3_t MAE:0.0688
CPE3_C MAE:0.0310
 
Fitting regression for RCPE-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1478/1478 [02:00<00:00, 12.26it/s]




Feature Extraction: 100%|██████████| 370/370 [00:30<00:00, 12.14it/s]


MAE:11440.2930
R1 MAE:33.5742
R2 MAE:227.6342
R3 MAE:2660.1847
R4 MAE:134361.5944
CPE1_t MAE:0.1517
CPE1_C MAE:0.0000
CPE2_t MAE:0.1330
CPE2_C MAE:0.0027
CPE3_t MAE:0.1115
CPE3_C MAE:0.0064
CPE4_t MAE:0.0926
CPE4_C MAE:0.0299
 
Fitting regression for Rs_Ws


Feature Extraction: 100%|██████████| 520/520 [00:47<00:00, 11.00it/s]




Feature Extraction: 100%|██████████| 132/132 [00:10<00:00, 12.01it/s]


MAE:150.4973
R1 MAE:40.9954
W1_R MAE:223.3179
W1_T MAE:337.4870
W1_p MAE:0.1890


quantile transform on target

In [None]:
ppls = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  # transformer mask, tell the transformer which parameters to skip over
  mask = [n for n, x in enumerate(df_["param_strs"].loc[df_.index[0]]) if '_t' in x]
  mdl = MultiOutputRegressor(estimator=xgb.XGBRegressor())
  regr = TransformedTargetRegressor(regressor=mdl,
                                    transformer=QuantileTransformer(n_quantiles=10, random_state=0))
                                    #transformer=PowerTransformer())
  ppl = Pipeline([
        ('augmenter', FeatureAugmenter(column_id='id', column_sort='freq', default_fc_parameters=ComprehensiveFCParameters())),
        ('regressor', regr)
      ])
  ppl.set_params(augmenter__timeseries_container=df_ts_);
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4f" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4f" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls.append(ppl)


 
Fitting regression for L-R-RCPE


Feature Extraction: 100%|██████████| 1384/1384 [01:54<00:00, 12.13it/s]




Feature Extraction: 100%|██████████| 346/346 [00:28<00:00, 12.27it/s]


MAE:4745.8882
L1 MAE:0.0000
R1 MAE:34.5926
R2 MAE:23694.7899
CPE1_t MAE:0.0489
CPE1_C MAE:0.0096
 
Fitting regression for L-R-RCPE-RCPE


Feature Extraction: 100%|██████████| 1440/1440 [01:58<00:00, 12.10it/s]




Feature Extraction: 100%|██████████| 360/360 [00:31<00:00, 11.60it/s]


MAE:5538.6045
L1 MAE:0.0000
R1 MAE:61.5315
R2 MAE:272.1309
CPE1_t MAE:0.1241
CPE1_C MAE:0.0000
R3 MAE:43974.9894
CPE2_t MAE:0.0502
CPE2_C MAE:0.0096
 
Fitting regression for L-R-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1400/1400 [01:54<00:00, 12.18it/s]




Feature Extraction: 100%|██████████| 352/352 [00:28<00:00, 12.45it/s]


MAE:5479.9450
L1 MAE:0.0000
R1 MAE:47.8261
R2 MAE:307.5457
CPE1_t MAE:0.1343
CPE1_C MAE:0.0000
R3 MAE:1996.7176
CPE2_t MAE:0.1054
CPE2_C MAE:0.0003
R4 MAE:57926.9687
CPE3_t MAE:0.0810
CPE3_C MAE:0.0163
 
Fitting regression for RC-G-G


Feature Extraction: 100%|██████████| 1424/1424 [01:55<00:00, 12.29it/s]




Feature Extraction: 100%|██████████| 356/356 [00:29<00:00, 12.12it/s]


MAE:7.4466
R1 MAE:17.3174
C1 MAE:0.0000
R_g1 MAE:1.2713
t_g1 MAE:0.7613
R_g2 MAE:14.8128
t_g2 MAE:10.5165
 
Fitting regression for RC-RC-RCPE-RCPE


Feature Extraction: 100%|██████████| 1450/1450 [01:57<00:00, 12.33it/s]




Feature Extraction: 100%|██████████| 364/364 [00:29<00:00, 12.49it/s]


MAE:6091.4197
R1 MAE:4.5597
R2 MAE:4.2307
R3 MAE:13.4391
R4 MAE:60891.4634
C2 MAE:0.0000
CPE3_C MAE:0.0001
CPE4_t MAE:0.0895
CPE4_C MAE:0.4147
C1 MAE:0.0000
CPE3_t MAE:0.0000
 
Fitting regression for RCPE-RCPE


Feature Extraction: 100%|██████████| 1368/1368 [01:53<00:00, 12.05it/s]




Feature Extraction: 100%|██████████| 342/342 [00:27<00:00, 12.37it/s]


MAE:3680.9706
R1 MAE:45.4900
R2 MAE:22040.1586
CPE1_t MAE:0.1125
CPE1_C MAE:0.0013
CPE2_t MAE:0.0502
CPE2_C MAE:0.0111
 
Fitting regression for RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1470/1470 [01:57<00:00, 12.51it/s]




Feature Extraction: 100%|██████████| 368/368 [00:32<00:00, 11.36it/s]


MAE:3267.4488
R1 MAE:10.8496
R2 MAE:336.6328
R3 MAE:29059.2144
CPE1_t MAE:0.1303
CPE1_C MAE:0.0000
CPE2_t MAE:0.1149
CPE2_C MAE:0.0000
CPE3_t MAE:0.0701
CPE3_C MAE:0.0266
 
Fitting regression for RCPE-RCPE-RCPE-RCPE


Feature Extraction: 100%|██████████| 1478/1478 [02:22<00:00, 10.35it/s]




Feature Extraction: 100%|██████████| 370/370 [00:29<00:00, 12.34it/s]


MAE:8039.6460
R1 MAE:30.8346
R2 MAE:161.8707
R3 MAE:2368.0198
R4 MAE:93914.5137
CPE1_t MAE:0.1505
CPE1_C MAE:0.0000
CPE2_t MAE:0.1298
CPE2_C MAE:0.0012
CPE3_t MAE:0.1138
CPE3_C MAE:0.0051
CPE4_t MAE:0.0915
CPE4_C MAE:0.0216
 
Fitting regression for Rs_Ws


Feature Extraction: 100%|██████████| 520/520 [00:42<00:00, 12.29it/s]




Feature Extraction: 100%|██████████| 132/132 [00:11<00:00, 11.55it/s]


MAE:105.7937
R1 MAE:21.2443
W1_R MAE:236.3289
W1_T MAE:165.3921
W1_p MAE:0.2096


dummy regressor no transform

In [None]:
ppls_0 = list()
for df_ in dfs:
  print(" ")
  print("Fitting regression for %s" % df_["Circuit"].loc[df_.index[0]])
  df_ts_ = df_ts[df_ts["id"].isin(df_.index)]
  df_x = pd.DataFrame(index=np.unique(df_ts_["id"]))
  df_y = pd.DataFrame(df_['param_values'].to_list(), columns=df_["param_strs"].loc[df_.index[0]])
  X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)
  mdl = DummyRegressor(strategy='median')
  ppl = Pipeline([
        ('regressor', mdl)
      ])
  ppl.fit(X_train, y_train)
  y_pred = ppl.predict(X_test)
  print("MAE:%.4f" % mean_absolute_error(y_test, y_pred))
  p_strs = df_["param_strs"].loc[df_.index[0]]
  for i in np.arange(len(p_strs)):
    print("%s MAE:%.4f" % (p_strs[i], mean_absolute_error(y_test[p_strs[i]], y_pred[:,i])))
  ppls_0.append(ppl)


 
Fitting regression for L-R-RCPE
MAE:7342.4158
L1 MAE:0.0000
R1 MAE:41.8812
R2 MAE:36670.0515
CPE1_t MAE:0.1324
CPE1_C MAE:0.0140
 
Fitting regression for L-R-RCPE-RCPE
MAE:6860.0513
L1 MAE:0.0000
R1 MAE:65.7694
R2 MAE:290.9311
CPE1_t MAE:0.1266
CPE1_C MAE:0.0000
R3 MAE:54523.4364
CPE2_t MAE:0.1360
CPE2_C MAE:0.0112
 
Fitting regression for L-R-RCPE-RCPE-RCPE
MAE:6933.0433
L1 MAE:0.0000
R1 MAE:52.7099
R2 MAE:336.4940
CPE1_t MAE:0.1257
CPE1_C MAE:0.0000
R3 MAE:2180.7010
CPE2_t MAE:0.1226
CPE2_C MAE:0.0003
R4 MAE:73693.1854
CPE3_t MAE:0.1193
CPE3_C MAE:0.0183
 
Fitting regression for RC-G-G
MAE:22.4008
R1 MAE:74.0982
C1 MAE:0.0000
R_g1 MAE:1.2834
t_g1 MAE:0.7691
R_g2 MAE:45.0847
t_g2 MAE:13.1691
 
Fitting regression for RC-RC-RCPE-RCPE
MAE:5966.7458
R1 MAE:33.1870
R2 MAE:6.3942
R3 MAE:40.6226
R4 MAE:59586.5399
C2 MAE:0.0000
CPE3_C MAE:0.0002
CPE4_t MAE:0.1629
CPE4_C MAE:0.5516
C1 MAE:0.0000
CPE3_t MAE:0.0000
 
Fitting regression for RCPE-RCPE
MAE:4632.0036
R1 MAE:50.8451
R2 MAE:27740.92

exploration

In [None]:
p_str = df["param_str"].loc[0]
p_str

array(['L1', 'R1', 'R2', 'CPE1_t', 'CPE1_C', 'R3', 'CPE2_t', 'CPE2_C',
       'R4', 'CPE3_t', 'CPE3_C'], dtype='<U6')

In [None]:
mask = [n for n, x in enumerate(p_str) if '_t' in x]
mask

[3, 6, 9]