# Load Libraries

In [1]:
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import ConvLSTM2D
from keras.utils import to_categorical
from keras.models import model_from_json
import pandas as pd
from hdfs import InsecureClient
import mlflow
import os
import shutil

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Connect to Hadoop

In [2]:
client_hdfs = InsecureClient('http://awscdh6-ma.sap.local:9870', user='dr.who')

In [3]:
client_hdfs.list('/tmp/tbr/BARMER/DSP')

['data_labeled_performance',
 'data_labeled_training',
 'data_unlabeled_predictions',
 'model']

# Load Model from Hadoop

Model Structure:

In [4]:
with client_hdfs.read('/tmp/tbr/BARMER/DSP/model/model_structure.json', encoding='utf-8') as reader:
    loaded_model_json = reader.read()
    reader.close()

model = model_from_json(loaded_model_json)

Instructions for updating:
Colocations handled automatically by placer.


  tensor_proto.float_val.extend([np.asscalar(x) for x in proto_values])


In [5]:
loaded_model_json[:100]

'{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "ConvLSTM2'

Model Weights:

In [6]:
path = "/tmp/tbr/BARMER/DSP/model/model_weights.h5"
download_path = client_hdfs.download(path, 'download', overwrite=True)
download_path

'C:\\Users\\tbraeutigam\\OneDrive\\Data Science\\Code\\ISR\\ISR - BARMER\\ISR DSP\\download'

In [7]:
# load weights into new model
model.load_weights(download_path)

Compile model:

In [8]:
# compile loaded model 
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

Delete temp folder

In [9]:
os.remove("download")

# Load Data (labeled) from Hadoop

In [10]:
# Check Trainings Data
client_hdfs.list('/tmp/tbr/BARMER/DSP/data_labeled_performance/Inertial Signals')

['body_acc_x.txt',
 'body_acc_y.txt',
 'body_acc_z.txt',
 'body_gyro_x.txt',
 'body_gyro_y.txt',
 'body_gyro_z.txt',
 'total_acc_x.txt',
 'total_acc_y.txt',
 'total_acc_z.txt',
 'y_labels.txt']

In [11]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix):
    # load data and labels
    X, y = load_dataset_group(prefix)
    
    # zero-offset class values
    y = y - 1
    
    # one hot encode y
    y = to_categorical(y)
    
    # return dataset
    return X, y

A function for loading a dataset group of files

In [12]:
# load a dataset group, such as train or test
def load_dataset_group(group):
    
    # load all 9 files as a single array
    filenames = list()
    
    # total acceleration
    filenames += ['/Inertial Signals/total_acc_x.txt',
                  '/Inertial Signals/total_acc_y.txt',
                  '/Inertial Signals/total_acc_z.txt']
    
    # body acceleration
    filenames += ['/Inertial Signals/body_acc_x.txt',
                  '/Inertial Signals/body_acc_y.txt',
                  '/Inertial Signals/body_acc_z.txt']
    
    # body gyroscope
    filenames += ['/Inertial Signals/body_gyro_x.txt',
                  '/Inertial Signals/body_gyro_y.txt',
                  '/Inertial Signals/body_gyro_z.txt']
    
    # load input data
    X = load_group(filenames, group)
    
    # load class output
    y = load_file(group+'/Inertial Signals/y_labels.txt')
    
    # return X and y
    return X, y

A function for loading a group of files

In [13]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, group):
    loaded = list()
    
    for name in filenames:

        data = load_file(group+name)
        loaded.append(data)
    
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

A function for loading a single file

In [14]:
# load a single file as a numpy array
def load_file(filepath):
    path = '/tmp/tbr/BARMER/DSP/' + filepath
    
    with client_hdfs.read(path, encoding = 'utf-8') as reader:
        dataframe = pd.read_csv(reader, header=None, delim_whitespace=True)   
        
    return dataframe.values

Execute Function-Chain

In [15]:
# load test data
testX, testy = load_dataset('data_labeled_performance')

In [16]:
testX[0]

array([[ 1.041216  , -0.2697959 ,  0.02377977, ...,  0.4374637 ,
         0.5313492 ,  0.1365279 ],
       [ 1.041803  , -0.280025  ,  0.07629271, ...,  0.4682641 ,
         0.7210685 ,  0.09762239],
       [ 1.039086  , -0.2926631 ,  0.1474754 , ...,  0.4982574 ,
         0.5203284 ,  0.08355578],
       ...,
       [ 0.9930164 , -0.2599865 ,  0.1443951 , ..., -0.00505586,
        -0.07734212,  0.03225787],
       [ 0.9932414 , -0.2620643 ,  0.1447033 , ..., -0.02043194,
        -0.072973  ,  0.02700848],
       [ 0.9943906 , -0.2641348 ,  0.1454939 , ..., -0.02999741,
        -0.07064875,  0.03054609]])

In [17]:
testy[0]

array([0., 0., 0., 0., 1., 0.], dtype=float32)

# Data Preprocessing

# Evaluate Performance

In [18]:
def evaluate_performance(testX, testy):

    # reshape data into subsequences (samples, time steps, rows, cols, channels)
    samples = testX.shape[0]
    time_steps = 4
    rows = 1
    columns = 32
    channels = 9 #number of features    
    testX = testX.reshape((samples, time_steps, rows, columns, channels))
    
    loss, accuracy = model.evaluate(testX, testy, verbose=0)
    return loss, accuracy   

In [19]:
loss, accuracy = evaluate_performance(testX, testy)

In [20]:
loss, accuracy

(0.16067222294393982, 0.9662368297576904)

# Update MLflow on Hadoop

In [21]:
client_hdfs.download("/tmp/tbr/BARMER/DSP/model/mlruns", "./mlruns", overwrite=True)

'C:\\Users\\tbraeutigam\\OneDrive\\Data Science\\Code\\ISR\\ISR - BARMER\\ISR DSP\\mlruns'

In [22]:
mlflow.set_tracking_uri('./mlruns')
experiment = mlflow.get_experiment_by_name('Activity_Recognition')
experiment.experiment_id

'1'

In [23]:
#mlflow.set_experiment(name='Activity_Recognition')
mlflow.start_run(experiment_id=experiment.experiment_id,
                 run_name='04_evaluate_performance')
mlflow.set_tag("type", "evaluation")
mlflow.log_metric("loss", loss)
mlflow.log_metric("accuracy", accuracy)
mlflow.log_param("Examples Testing",len(testX))
mlflow.log_param("model","Keras")
#mlflow.log_artifact("04_evaluate_performance.ipynb")
mlflow.log_artifact("/vrep/vflow/tmp/DSP/EXECUTED/04_evaluate_performance.ipynb") #Location in Datahub
mlflow.end_run()

In [24]:
local_path = mlflow.get_tracking_uri()
mlflow.end_run()
local_path

'./mlruns'

In [25]:
path = "/tmp/tbr/BARMER/DSP/model/"
_ = client_hdfs.upload(hdfs_path=path, local_path=local_path, overwrite=True)

In [26]:
client_hdfs.list('/tmp/tbr/BARMER/DSP/model/mlruns')

['.trash', '0', '1']

Delete temp folder

In [27]:
shutil.rmtree(local_path, ignore_errors=True)