In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, r2_score
import tensorflow as tf
# from keras.models import load_model
import joblib
from config import interim_data_path, masks_path, data_path, sensors, targets

In [None]:
def fit_my_model(X_trn, y_trn, X_vld, y_vld, params):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(8, activation='sigmoid'))
    model.add(tf.keras.layers.Dense(1,  activation='sigmoid'))
    
    adam = tf.keras.optimizers.Adam(learning_rate=params['lr'])
    model.compile(optimizer=adam, loss='mean_squared_error')

    callback = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=params['min_delta'],
        patience=params['patience'],
        verbose=1,
        mode='min',
        baseline=None,
        restore_best_weights=True,
    )

    model.fit(X_trn, y_trn, 
              batch_size=4,
              epochs=params['epochs'],
              verbose=0,
              callbacks=callback,
              validation_data=(X_vld, y_vld),
             )
    return model

In [None]:
params = {
    'min_delta' : 0.001,
    'patience' : 10,
    'epochs' : 50,
    'lr' : 0.001
}

y_trn = pd.read_feather(interim_data_path / 'trn_targets.feather') / 100
y_vld = pd.read_feather(interim_data_path / 'vld_targets.feather') / 100

result = []
for sensor in sensors:
    X_trn = pd.read_feather(interim_data_path / f'{sensor}-trn.feather')
    X_vld = pd.read_feather(interim_data_path / f'{sensor}-vld.feather')

    scalers_path = data_path / 'scalers' / f'{sensor}_scaler.joblib'
    scaler = MinMaxScaler()
    X_trn = pd.DataFrame(scaler.fit_transform(X_trn), columns=X_trn.columns)
    X_vld = pd.DataFrame(scaler.transform(X_vld), columns=X_vld.columns)
    joblib.dump(scaler, scalers_path)

    masks_df = pd.read_feather(masks_path / f'{sensor}.feather')
    for target in targets:
        for to_mask in [True, False]:
            if to_mask == True:
                selected_columns = masks_df.index[masks_df[target]]
            elif to_mask == False:
                selected_columns = masks_df.index
            
            model = fit_my_model(
                X_trn[selected_columns], 
                y_trn[target], 
                X_vld[selected_columns], 
                y_vld[target], 
                params
                )

            model_name = data_path / 'models' / 'NN' / f'NN_{sensor}_{target}_{to_mask}.h5'
            model.save(model_name)

            y_pred = model.predict(X_vld[selected_columns])

            mae = mean_absolute_error(y_vld[target], y_pred)
            r2 = r2_score(y_vld[target], y_pred)
            result.append([sensor, target, to_mask, mae, r2])

In [5]:
final_result = pd.DataFrame(result, columns=['sensor', 'target', 'mask', 'mae', 'r2'])

In [13]:
# import shutil
# shutil.make_archive('test', 'zip', data_path / 'models' / 'NN', data_path / 'models' / 'NN')

In [6]:
final_result

Unnamed: 0,sensor,target,mask,mae,r2
0,R1,NO_conc,True,0.025980,0.616754
1,R1,NO_conc,False,0.040308,0.386516
2,R1,CH4_conc,True,0.070382,0.003260
3,R1,CH4_conc,False,0.053941,0.115829
4,R1,H2S_conc,True,0.025453,-93.116717
...,...,...,...,...,...
211,R12,H2_conc,False,0.044527,0.170050
212,R12,NO2_conc,True,0.013117,-6.228623
213,R12,NO2_conc,False,0.027181,-30.487970
214,R12,NH3_conc,True,0.055439,0.160081


In [7]:
# loaded_pred = loaded_model.predict(X_vld[selected_columns])
# mae = mean_absolute_error(y_vld[target], y_pred)
# r2 = r2_score(y_vld[target], y_pred)
# result.append([sensor, target, to_mask, mae, r2])