# Load Libraries

In [1]:
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import ConvLSTM2D
from keras.utils import to_categorical
from keras.models import model_from_json
import pandas as pd
from hdfs import InsecureClient

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Connect to Hadoop

In [2]:
client_hdfs = InsecureClient('http://awscdh6-ma.sap.local:9870', user='dr.who')

In [3]:
client_hdfs.list('/tmp/tbr/BARMER/DSP')

['data_labeled_performance',
 'data_labeled_training',
 'data_unlabeled_predictions',
 'model']

# Load Model from Hadoop

Model Structure:

In [4]:
with client_hdfs.read('/tmp/tbr/BARMER/DSP/model/model_structure.json', encoding='utf-8') as reader:
    loaded_model_json = reader.read()
    reader.close()

model = model_from_json(loaded_model_json)

Instructions for updating:
Colocations handled automatically by placer.


In [26]:
loaded_model_json[:100]

'{"class_name": "Sequential", "config": {"name": "sequential_2", "layers": [{"class_name": "ConvLSTM2'

Model Weights:

In [27]:
path = "/tmp/tbr/BARMER/DSP/model/model_weights.h5"
download_path = client_hdfs.download(path, 'download', overwrite=True)
download_path

'C:\\Users\\tbraeutigam\\OneDrive\\Data Science\\Code\\ISR\\ISR - BARMER\\ISR DSP\\download'

In [8]:
model.load_weights(download_path)

Compile model:

In [9]:
# compile loaded model 
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# Load Data (unlabeled) from Hadoop

In [10]:
# Check Trainings Data
client_hdfs.list('/tmp/tbr/BARMER/DSP/data_unlabeled_predictions/Inertial Signals')

['body_acc_x.txt',
 'body_acc_y.txt',
 'body_acc_z.txt',
 'body_gyro_x.txt',
 'body_gyro_y.txt',
 'body_gyro_z.txt',
 'total_acc_x.txt',
 'total_acc_y.txt',
 'total_acc_z.txt']

In [28]:
def load_dataset(prefix):
    # load data and labels
    X = load_dataset_group(prefix)  
   
    # return dataset
    return X

In [12]:
# load a dataset group
def load_dataset_group(group):
    
    # load all 9 files as a single array
    filenames = list()
    
    # total acceleration
    filenames += ['/Inertial Signals/total_acc_x.txt',
                  '/Inertial Signals/total_acc_y.txt',
                  '/Inertial Signals/total_acc_z.txt']
    
    # body acceleration
    filenames += ['/Inertial Signals/body_acc_x.txt',
                  '/Inertial Signals/body_acc_y.txt',
                  '/Inertial Signals/body_acc_z.txt']
    
    # body gyroscope
    filenames += ['/Inertial Signals/body_gyro_x.txt',
                  '/Inertial Signals/body_gyro_y.txt',
                  '/Inertial Signals/body_gyro_z.txt']
    
    # load input data
    X = load_group(filenames, group)
       
    # return X
    return X

In [13]:
# load a list of files and return as a 3d numpy array
def load_group(filenames, group):
    loaded = list()
    
    for name in filenames:

        data = load_file(group+name)
        loaded.append(data)
    
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

In [14]:
# load a single file as a numpy array
def load_file(filepath):
    #dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    
    path = '/tmp/tbr/BARMER/DSP/' + filepath
    
    with client_hdfs.read(path, encoding = 'utf-8') as reader:
        dataframe = pd.read_csv(reader, header=None, delim_whitespace=True)   
        
    return dataframe.values

In [15]:
# load data
X = load_dataset('data_unlabeled_predictions')

In [16]:
X[0]

array([[ 1.041216  , -0.2697959 ,  0.02377977, ...,  0.4374637 ,
         0.5313492 ,  0.1365279 ],
       [ 1.041803  , -0.280025  ,  0.07629271, ...,  0.4682641 ,
         0.7210685 ,  0.09762239],
       [ 1.039086  , -0.2926631 ,  0.1474754 , ...,  0.4982574 ,
         0.5203284 ,  0.08355578],
       ...,
       [ 0.9930164 , -0.2599865 ,  0.1443951 , ..., -0.00505586,
        -0.07734212,  0.03225787],
       [ 0.9932414 , -0.2620643 ,  0.1447033 , ..., -0.02043194,
        -0.072973  ,  0.02700848],
       [ 0.9943906 , -0.2641348 ,  0.1454939 , ..., -0.02999741,
        -0.07064875,  0.03054609]])

In [17]:
X[0].shape

(128, 9)

# Data Preprocessing

# Make Predictions

In [18]:
def make_prediction(X):
    # define parameters
    verbose = 0
    samples = X.shape[0]
    time_steps = 4
    rows = 1
    columns = 32
    channels = 9 #number of features   

    
    # reshape data into subsequences (samples, time steps, rows, cols, channels)
    X = X.reshape((samples, time_steps, rows, columns, channels))
        
    # fit network
    class_predictions = model.predict_classes(X, verbose=verbose)    
    
    return class_predictions

In [19]:
predictions = pd.DataFrame(make_prediction(X), columns=['Predicted Activity Code'])

- 0 WALKING
- 1 WALKING_UPSTAIRS
- 2 WALKING_DOWNSTAIRS
- 3 SITTING
- 4 STANDING
- 5 LAYING

In [20]:
mapping = {0:"WALKING",
           1:"WALKING_UPSTAIRS",
           2:"WALKING_DOWNSTAIRS",
           3:"SITTING",
           4:"STANDING",
           5:"LAYING"}

In [21]:
pred_activity_text = [mapping[code] for code in predictions['Predicted Activity Code']]

In [22]:
predictions['Predicted Activity Text'] = pred_activity_text

In [23]:
predictions.sample(10).sort_values("Predicted Activity Code")

Unnamed: 0,Predicted Activity Code,Predicted Activity Text
2494,0,WALKING
1617,0,WALKING
1001,0,WALKING
2918,1,WALKING_UPSTAIRS
1482,2,WALKING_DOWNSTAIRS
1018,2,WALKING_DOWNSTAIRS
635,3,SITTING
2085,3,SITTING
1439,5,LAYING
2469,5,LAYING


# Write Predictions to Hadoop

In [24]:
path = "/tmp/tbr/BARMER/DSP/data_unlabeled_predictions/predictions.csv"

with client_hdfs.write(path, encoding = 'utf-8', overwrite=True) as writer:
    predictions.to_csv(writer)

In [25]:
client_hdfs.list("/tmp/tbr/BARMER/DSP/data_unlabeled_predictions/")

['Inertial Signals', 'predictions.csv']