In [13]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
import joblib
from config import interim_data_path, masks_path

In [14]:
def minmax_scale_data(X_trn, X_vld, X_tst, scaler_name):
    scaler = MinMaxScaler()
    X_trn_scaled = pd.DataFrame(scaler.fit_transform(X_trn), columns=X_trn.columns)
    X_vld_scaled = pd.DataFrame(scaler.transform(X_vld), columns=X_vld.columns)
    X_tst_scaled = pd.DataFrame(scaler.transform(X_tst), columns=X_tst.columns)
    joblib.dump(scaler, scaler_name)
    return X_trn_scaled, X_vld_scaled, X_tst_scaled

In [None]:
sensors = [f'R{i}' for i in range(1,13)]

targets = [
    'NO_conc',
    'CH4_conc',
    'H2S_conc',
    'SO2_conc',
    'HCOH_conc',
    'CO_conc',
    'H2_conc',
    'NO2_conc',
    'NH3_conc'
    ]
y_trn = pd.read_feather(interim_data_path / 'trn_targets.feather') / 100
y_vld = pd.read_feather(interim_data_path / 'vld_targets.feather') / 100
y_tst = pd.read_feather(interim_data_path / 'tst_targets.feather') / 100

result = []
for sensor in sensors:
    X_trn = pd.read_feather(interim_data_path / f'{sensor}-trn.feather')
    X_vld = pd.read_feather(interim_data_path / f'{sensor}-vld.feather')
    X_tst = pd.read_feather(interim_data_path / f'{sensor}-tst.feather')

    X_trn, X_vld, X_tst = minmax_scale_data(
        X_trn, 
        X_vld, 
        X_tst, 
        interim_data_path / 'scalers' / f'{sensor}_scaler.gz'
        )

    masks_df = pd.read_feather(masks_path / f'{sensor}.feather')
    for target in targets:
        selected_columns = masks_df.index[masks_df[target]]
        model = Ridge(alpha=1.0)
        model.fit(X_trn[selected_columns], y_trn[target])
        r2 = model.score(X_vld[selected_columns], y_vld[target])
        result.append([sensor, target, r2])
        # print(f'{sensor} : {target} : {model.score(X_vld[selected_columns], y_vld[target]):0.3f}')

In [18]:
pd.DataFrame(result, columns=['sensor', 'target', 'r2'])

Unnamed: 0,sensor,target,r2
0,R1,NO_conc,0.683133
1,R1,CH4_conc,0.090483
2,R1,H2S_conc,0.101109
3,R1,SO2_conc,0.127982
4,R1,HCOH_conc,0.098010
...,...,...,...
103,R12,HCOH_conc,0.063455
104,R12,CO_conc,0.057898
105,R12,H2_conc,0.032328
106,R12,NO2_conc,0.136800


In [None]:
# for target in targets:
#     model = Ridge(alpha=1.0)
#     model.fit(X_trn, y_trn[target])
#     print(f'{target} : {model.score(X_vld, y_vld[target]):0.3f}')

NO_conc : 0.990
CH4_conc : 0.896
H2S_conc : 0.977
SO2_conc : 0.679
HCOH_conc : 0.943
CO_conc : 0.931
H2_conc : 0.925
NO2_conc : 0.989
NH3_conc : 1.000
