In [25]:
import pandas as pd
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
import tensorflow.keras.metrics
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from MLscores import calc_metrics, metrics_dict, cmvals, recall, hybridrecall
import tensorflow.keras.backend as K
import tensorflow as tf
#import eli5
#from eli5.sklearn import PermutationImportance

In [26]:
def create_NN_model(params, X):
    # define model
    model = Sequential()
    n_features = X.shape[1]
    intlayers = int(params['n_internal_layers'][0])
    model.add(Dense(params['n_internal_layers'][1]['layer_1_' + str(intlayers) + '_nodes'], activation='relu', input_shape=(n_features,))) #kernel_initializer=initializer))
    if not params['dropout'] is None:
        model.add(Dropout(params['dropout']))
    for i in range(2, intlayers + 2):
        model.add(Dense(int(params['n_internal_layers'][1]['layer_' + str(i) + '_' + str(intlayers) + '_nodes']),
                        activation='relu', )) #kernel_initializer=initializer))
        if not params['dropout'] is None:
            model.add(Dropout(params['dropout']))
    model.add(Dense(2, activation='softmax'))

    # compile the model
    if params['optimizer']['name']=='Adam':
        if params['optimizer']['adam_params'] is None:
            opt = Adam()
        else:
            opt = Adam(learning_rate=params['optimizer']['adam_params']['learning_rate_adam'], beta_1=params['optimizer']['adam_params']['beta_1'],
                       beta_2=params['optimizer']['adam_params']['beta_2'],amsgrad=params['optimizer']['adam_params']['amsgrad'])
    elif params['optimizer']['name']=='SGD':
        opt = SGD(learning_rate=params['optimizer']['learning_rate_SGD'])

    if params['metric'] == 'accuracy':
        metrics = ['accuracy']
    elif params['metric'] == 'sparse':
        metrics = [tensorflow.metrics.SparseCategoricalAccuracy()]
    elif params['metric'] == 'tn':
        metrics = [tensorflow.metrics.TrueNegatives(),tensorflow.metrics.TruePositives()]
    if 'loss' in params and params['loss'] == 'unbalanced':
        lossf=unbalanced_loss
    else:
        lossf='sparse_categorical_crossentropy'
    model.compile(optimizer=opt, loss=lossf, metrics=metrics)  # , AUC(multi_label=False)])
    return model

In [27]:
def load_dataset(trfiles, featuredrop=[], debug=True, returnid=False):
    # dsfile = 'dataset_ndvi_lu.csv'
    domdircheck = 'dom_dir'
    dirmaxcheck = 'dir_max'
    corinecheck = 'Corine'
    monthcheck = 'month'
    wkdcheck = 'wkd'
    firedatecheck = 'firedate'
    X_columns = ['max_temp', 'min_temp', 'mean_temp', 'res_max', dirmaxcheck, 'dom_vel', domdircheck,
                 'rain_7days', corinecheck, 'Slope', 'DEM', 'Curvature', 'Aspect', 'ndvi', 'evi', 'lst_day',
                 'lst_night', monthcheck, wkdcheck,
                 'mean_dew_temp', 'max_dew_temp', 'min_dew_temp','frequency', 'f81', 'x', 'y']
    y_columns = ['fire']
    # if not os.path.exists(os.path.join(dsetfolder, dsready)):
    if isinstance(trfiles, list):
        if debug:
            print("Loading full dataset ...")
        dflist=[]
        for dsfile in trfiles:
            if debug:
                print("Loading dataset file %s" % dsfile)
            dflist.append(pd.read_csv(dsfile))
        df = pd.concat(dflist)
    else:
        dsfile = trfiles
    df = pd.read_csv(dsfile)
    X_columns_upper = [c.upper() for c in X_columns]
    newcols = [c for c in df.columns if
               c.upper() in X_columns_upper or any([cX in c.upper() for cX in X_columns_upper])]
    X_columns = newcols
    #corine_col, newcols = check_categorical(df, corinecheck, newcols)
    #dirmax_col, newcols = check_categorical(df, dirmaxcheck, newcols)
    #domdir_col, newcols = check_categorical(df, domdircheck, newcols)
    #month_col, newcols = check_categorical(df, monthcheck, newcols)
    #wkd_col, newcols = check_categorical(df, wkdcheck, newcols)

    firedate_col = [c for c in df.columns if firedatecheck.upper() in c.upper()][0]
    X, y, groupspd = prepare_dataset(df, X_columns, y_columns, firedate_col)
    print("Ignored columns from csv %s"%([c for c in df.columns if c not in X.columns]))
    idpd = df['id']
    df = None
    X_columns = X.columns
    if len(featuredrop) > 0:
        X = X.drop(columns=[c for c in X.columns if any([fd in c for fd in featuredrop])])
    print("Dropped columns %s"%(list(set(X_columns)-set(X.columns))))
    #if debug:
    #    print("X helth check %s"%X.describe())
    #    print("y helth check %s"%y.describe())
    if returnid:
        return X, y, groupspd, idpd
    else:
        return X, y, groupspd

In [28]:
def prepare_dataset(df, X_columns, y_columns, firedate_col):
    df = df[X_columns+y_columns+[firedate_col]]
    print('before nan drop: %d' % len(df.index))
    df = df.dropna()
    print('after nan drop: %d' % len(df.index))
    df = df.drop_duplicates(keep='first')
    df.reset_index(inplace=True, drop=True)
    print('after dup. drop: %d' % len(df.index))
    print('renaming "x": "xpos", "y": "ypos"')
    X_unnorm, y_int = df[X_columns], df[y_columns]
    X_unnorm = X_unnorm.rename(columns={'x': 'xpos', 'y': 'ypos'})
    # X = normdataset.normalize_dataset(X_unnorm, aggrfile='stats/featurestats.json')
    X = X_unnorm
    y = y_int
    groupspd = df[firedate_col]
    return X, y, groupspd

In [29]:
X, y, g=load_dataset('/home/aapostolakis/Documents/ffpdata/newcrossval/datasets/randomnofire/old_random_new_features_norm.csv')

before nan drop: 26504
after nan drop: 26504
after dup. drop: 26504
renaming "x": "xpos", "y": "ypos"
Ignored columns from csv ['id', 'firedate', 'fire', 'x', 'y']
Dropped columns []


In [30]:
params={'ES_mindelta': 0.002, 'ES_monitor': 'loss', 'ES_patience': 10, 'batch_size': 512, 'class_weights': {0: 1, 1: 10}, 'dropout': 0.1, 'feature_drop': ('dir_max', 'dom_dir', 'month', 'wkd'), 'max_epochs': 2000, 'metric': 'accuracy', 'n_internal_layers': (0, {'layer_1_0_nodes': 70.0}), 'optimizer': {'adam_params': None, 'name': 'Adam'}}

In [31]:
nnmodel = create_NN_model(params, X)