In [None]:
import pandas as pd
import numpy as np
import mlflow
import os
import uuid 
import shutil

from keras.models import Sequential
from keras.layers import Dense, Activation
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support as score

from mlflow.keras import save_model

In [None]:
# load data
train_labels, train_values = [pd.read_csv(f'../data/{f}.csv') for f in ['train_labels', 'train_values']]
tt_data = train_values.join(train_labels.set_index('building_id'), on='building_id')

In [None]:
# drop unnesesary columns from train data
tt_x = tt_data.drop(columns=['building_id','damage_grade']).copy()

In [None]:
# label encode all columns with stings
for col in tt_x.columns:
    if tt_x[col].dtype == 'object':
        tt_x[col] = LabelEncoder().fit_transform(tt_x[col])

In [None]:
def one_hot_encode(data):
    examples = len(data)
    set_len = len(set(data))
    arr = np.zeros((examples, set_len))
    arr[np.arange(examples), data.apply(lambda x: x-1)] = 1
    return arr

# one-hot encode column to predict
tt_y = one_hot_encode(tt_data.damage_grade)

In [None]:
# split into test/train in 0.25 ratio
train_x, test_x, train_y, test_y = train_test_split(tt_x, tt_y, test_size=0.25, random_state=5)

In [None]:
def run_learning(layer_input, layer_hidden, layer_output, batch_size, nb_epoch, loss, optimizer, metrics):
    # save passed params
    params = locals()
    
    # set path to main directory instead run direcotry
    absolute_path = os.path.abspath("../")
    mlflow.set_tracking_uri(f'file:{absolute_path}/mlruns')
    mlflow.set_experiment('modeling-earthquake-damage')
    
    with mlflow.start_run(run_name='sample_run') as run:
        # log parameters
        for param in params:
            mlflow.log_param(param, params[param])
        
        #create temp folder
        experiment_temp_path = f'.tmp/{uuid.uuid1().hex}'
        
        # create model
        model = Sequential()
        model.add(Dense(layer_input[0], input_dim=layer_input[1], activation=layer_input[2]))
        for neurons, activation in layer_hidden:
            model.add(Dense(neurons, activation=activation))
        model.add(Dense(layer_output[0], activation=layer_output[1]))
        model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

        # fit model
        fit_result = model.fit(train_x, train_y,
                  batch_size=batch_size,
                  epochs=nb_epoch,
                  validation_data=(test_x, test_y),
                  shuffle=True)
        # predict
        predictions = model.predict(test_x)

        precision, recall, fscore, support = score(np.argmax(test_y, axis=1), np.argmax(predictions, axis=1), average='micro')
        
        # save results
        for key in fit_result.history.keys():
            for epoch,value in enumerate(fit_result.history[key], 1):
                mlflow.log_metric(key, value, epoch)
        
        mlflow.log_metric("micro_precision", precision)
        mlflow.log_metric("micro_recall", recall)
        mlflow.log_metric("micro_fscore", fscore)
        
        # save model
        save_model(model, f'{experiment_temp_path}/model')
        shutil.make_archive(f'{experiment_temp_path}/model', 'zip', f'{experiment_temp_path}/model')
        
        # move model to artifact
        mlflow.log_artifact(f'{experiment_temp_path}/model.zip')
        
        #remove temp files as they were already moved
        shutil.rmtree('.tmp')
        
# some parameters        
params = {
    'layer_input': (76, 38, 'sigmoid'),
    'layer_hidden': [(76, 'sigmoid')],
    'layer_output': (3, 'softmax'),
    'batch_size': 100,
    'nb_epoch': 10,
    'loss': 'categorical_crossentropy',
    'optimizer': 'adamax', 
    'metrics': ['accuracy']
}
# run the code
run_learning(**params)

In [None]:
params = {
    'layer_input': (76, 38, 'sigmoid'),
    'layer_hidden': [(76, 'sigmoid')],
    'layer_output': (3, 'softmax'),
    'batch_size': 1000,
    'nb_epoch': 10,
    'loss': 'categorical_crossentropy',
    'optimizer': 'adamax', 
    'metrics': ['accuracy']
}
run_learning(**params)

In [None]:
params = {
    'layer_input': (152, 38, 'sigmoid'),
    'layer_hidden': [(152, 'sigmoid')],
    'layer_output': (3, 'softmax'),
    'batch_size': 100,
    'nb_epoch': 10,
    'loss': 'categorical_crossentropy',
    'optimizer': 'adamax', 
    'metrics': ['accuracy']
}
run_learning(**params)

In [None]:
params = {
    'layer_input': (152, 38, 'sigmoid'),
    'layer_hidden': [(152, 'sigmoid')],
    'layer_output': (3, 'softmax'),
    'batch_size': 100,
    'nb_epoch': 10,
    'loss': 'categorical_crossentropy',
    'optimizer': 'adamax', 
    'metrics': ['accuracy']
}
run_learning(**params)