# Load Libraries

In [43]:
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import ConvLSTM2D
from keras.utils import to_categorical
from keras.models import model_from_json
import pandas as pd
from hdfs import InsecureClient
import mlflow
import os
import shutil

# Connect to Hadoop

In [31]:
client_hdfs = InsecureClient('http://awscdh6-ma.sap.local:9870', user='dr.who')

In [30]:
client_hdfs.list('/tmp/tbr/BARMER/DSP')

['data_labeled_performance',
 'data_labeled_training',
 'data_unlabeled_predictions',
 'model']

# Load Model from Hadoop

Model Structure:

In [9]:
with client_hdfs.read('/tmp/tbr/BARMER/DSP/model/model_structure.json', encoding='utf-8') as reader:
    loaded_model_json = reader.read()
    reader.close()

model = model_from_json(loaded_model_json)

In [10]:
loaded_model_json[:100]

'{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "ConvLSTM2'

Model Weights:

In [11]:
path = "/tmp/tbr/BARMER/DSP/model/model_weights.h5"
download_path = client_hdfs.download(path, 'download', overwrite=True)
download_path

'C:\\Users\\tbraeutigam\\OneDrive\\Data Science\\Code\\ISR\\ISR - BARMER\\ISR DSP\\download'

In [12]:
model.load_weights(download_path)

Compile model:

In [13]:
# compile loaded model 
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

Delete temp folder

In [14]:
os.remove("download")

# Load Data (unlabeled) from Hadoop

In [15]:
# Check Trainings Data
client_hdfs.list('/tmp/tbr/BARMER/DSP/data_unlabeled_predictions/Inertial Signals')

['body_acc_x.txt',
 'body_acc_y.txt',
 'body_acc_z.txt',
 'body_gyro_x.txt',
 'body_gyro_y.txt',
 'body_gyro_z.txt',
 'total_acc_x.txt',
 'total_acc_y.txt',
 'total_acc_z.txt']

In [16]:
def load_dataset(prefix):
    # load data and labels
    X = load_dataset_group(prefix)  
   
    # return dataset
    return X

In [17]:
# load a dataset group
def load_dataset_group(group):
    
    # load all 9 files as a single array
    filenames = list()
    
    # total acceleration
    filenames += ['/Inertial Signals/total_acc_x.txt',
                  '/Inertial Signals/total_acc_y.txt',
                  '/Inertial Signals/total_acc_z.txt']
    
    # body acceleration
    filenames += ['/Inertial Signals/body_acc_x.txt',
                  '/Inertial Signals/body_acc_y.txt',
                  '/Inertial Signals/body_acc_z.txt']
    
    # body gyroscope
    filenames += ['/Inertial Signals/body_gyro_x.txt',
                  '/Inertial Signals/body_gyro_y.txt',
                  '/Inertial Signals/body_gyro_z.txt']
    
    # load input data
    X = load_group(filenames, group)
       
    # return X
    return X

In [18]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, group):
    loaded = list()
    
    for name in filenames:

        data = load_file(group+name)
        loaded.append(data)
    
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

In [19]:
# load a single file as a numpy array
def load_file(filepath):
    #dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    
    path = '/tmp/tbr/BARMER/DSP/' + filepath
    
    with client_hdfs.read(path, encoding = 'utf-8') as reader:
        dataframe = pd.read_csv(reader, header=None, delim_whitespace=True)   
        
    return dataframe.values

In [20]:
# load data
X = load_dataset('data_unlabeled_predictions')

In [21]:
X[0]

array([[ 1.041216  , -0.2697959 ,  0.02377977, ...,  0.4374637 ,
         0.5313492 ,  0.1365279 ],
       [ 1.041803  , -0.280025  ,  0.07629271, ...,  0.4682641 ,
         0.7210685 ,  0.09762239],
       [ 1.039086  , -0.2926631 ,  0.1474754 , ...,  0.4982574 ,
         0.5203284 ,  0.08355578],
       ...,
       [ 0.9930164 , -0.2599865 ,  0.1443951 , ..., -0.00505586,
        -0.07734212,  0.03225787],
       [ 0.9932414 , -0.2620643 ,  0.1447033 , ..., -0.02043194,
        -0.072973  ,  0.02700848],
       [ 0.9943906 , -0.2641348 ,  0.1454939 , ..., -0.02999741,
        -0.07064875,  0.03054609]])

In [22]:
X[0].shape

(128, 9)

# Data Preprocessing

# Make Predictions

In [23]:
def make_prediction(X):
    # define parameters
    verbose = 1
    samples = X.shape[0]
    time_steps = 4
    rows = 1
    columns = 32
    channels = 9 #number of features   

    
    # reshape data into subsequences (samples, time steps, rows, cols, channels)
    X = X.reshape((samples, time_steps, rows, columns, channels))
        
    # fit network
    class_predictions = model.predict_classes(X, verbose=verbose)    
    
    return class_predictions

In [24]:
predictions = pd.DataFrame(make_prediction(X), columns=['Predicted Activity Code'])



- 0 WALKING
- 1 WALKING_UPSTAIRS
- 2 WALKING_DOWNSTAIRS
- 3 SITTING
- 4 STANDING
- 5 LAYING

In [25]:
mapping = {0:"WALKING",
           1:"WALKING_UPSTAIRS",
           2:"WALKING_DOWNSTAIRS",
           3:"SITTING",
           4:"STANDING",
           5:"LAYING"}

In [26]:
pred_activity_text = [mapping[code] for code in predictions['Predicted Activity Code']]

In [27]:
predictions['Predicted Activity Text'] = pred_activity_text

In [28]:
predictions.sample(10).sort_values("Predicted Activity Code")

Unnamed: 0,Predicted Activity Code,Predicted Activity Text
2883,0,WALKING
2509,0,WALKING
2149,0,WALKING
756,0,WALKING
398,0,WALKING
2372,1,WALKING_UPSTAIRS
2001,2,WALKING_DOWNSTAIRS
2577,4,STANDING
20,4,STANDING
1386,4,STANDING


# Write Predictions to Hadoop

In [32]:
path = "/tmp/tbr/BARMER/DSP/data_unlabeled_predictions/predictions.csv"

with client_hdfs.write(path, encoding = 'utf-8', overwrite=True) as writer:
    predictions.to_csv(writer)

In [33]:
client_hdfs.list("/tmp/tbr/BARMER/DSP/data_unlabeled_predictions/")

['Inertial Signals', 'predictions.csv']

# Update MLflow on Hadoop

In [34]:
client_hdfs.download("/tmp/tbr/BARMER/DSP/model/mlruns", "./mlruns", overwrite=True)

'C:\\Users\\tbraeutigam\\OneDrive\\Data Science\\Code\\ISR\\ISR - BARMER\\ISR DSP\\mlruns'

In [35]:
mlflow.set_tracking_uri('./mlruns')
experiment = mlflow.get_experiment_by_name('Activity_Recognition')
experiment.experiment_id

'1'

In [36]:
#mlflow.set_experiment(name='Activity_Recognition')
mlflow.start_run(experiment_id=experiment.experiment_id,
                 run_name='02_make_prediction')
mlflow.log_metric("not available", 0)
mlflow.log_param("model","Keras")
mlflow.log_artifact("02_make_prediction.ipynb")
mlflow.end_run()

In [37]:
local_path = mlflow.get_tracking_uri()
mlflow.end_run()
local_path

'./mlruns'

In [38]:
path = "/tmp/tbr/BARMER/DSP/model/"
_ = client_hdfs.upload(hdfs_path=path, local_path=local_path, overwrite=True)

In [39]:
client_hdfs.list('/tmp/tbr/BARMER/DSP/model/mlruns')

['.trash', '0', '1']

Delete temp folder

In [44]:
shutil.rmtree(local_path, ignore_errors=True)