------------------------------------------------------------------
# IMPORT LIBRARIES
------------------------------------------------------------------

In [0]:
# import standard libraries
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Activation
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import svm
import keras
# tf.enable_eager_execution()

#import plotting libraries
import warnings # current version of seaborn generates a bunch of warnings that we'll ignore
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# import file handler libraries
!pip install -U -q PyDrive
import sys, os, shutil, zipfile,glob
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from sklearn.metrics import confusion_matrix, accuracy_score,roc_curve, auc,roc_auc_score,classification_report



In [0]:
#clear files
!rm -rf /content/training
!rm -rf /content/training_data

In [0]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)
!ls '/content/gdrive/My Drive/Models'

ValueError: ignored

------------------------------------------------------------------
# LINK DATA TO INSTANCE
------------------------------------------------------------------

In [0]:
# zip_id = '1LvGrbtylxRGegXc4W9ZOnosqmV1J855V' #FILE ID CREATED FROM SHARING URL (ID=....)
zip_id = '1jH_aAu-rNBDGUdoag8duHjN2xYHvbraE' #FILE ID CREATED FROM SHARING URL (ID=....)

auth.authenticate_user() # 1. Authenticate and create the PyDrive client.
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

if not os.path.exists('training'): #create directory 
    os.makedirs('training')
    print('made directory')

# DOWNLOAD ZIP
print ("Downloading zip file")
myzip = drive.CreateFile({'id': zip_id})
myzip.GetContentFile('training 2.zip')

# UNZIP ZIP
print ("Uncompressing zip file")
zip_ref = zipfile.ZipFile('training 2.zip', 'r') #file to be extracted
zip_ref.extractall('./training') #where the files are extracted
zip_ref.close()

training_path = '/content/training/training/'
print('finished unzipping, contents:\n',os.listdir(training_path))

made directory
Downloading zip file
Uncompressing zip file
finished unzipping, contents:
 ['heatpad_I_84.csv', 'hairdryer-high_ricky_81.csv', 'inductioncooktop_ricky_17.csv', 'kettle_ricky_58.csv', 'laptop_will_179.csv', 'hairdryer-low_ricky_15.csv', 'hairdryer-high_ricky_78.csv', 'inductioncooktop_ricky_74.csv', 'laptop_ricky_13.csv', 'hairdryer-high_ricky_30.csv', 'kettle_ricky_23.csv', 'inductioncooktop_ricky_3.csv', 'kettle_ricky_61.csv', 'hairdryer-high_ricky_29.csv', 'laptop_will_112.csv', 'heatpad_I_9.csv', 'laptop_will_90.csv', 'hairdryer-high_ricky_37.csv', 'heatpad_I_49.csv', 'inductioncooktop_ricky_12.csv', 'hairdryer-high_ricky_41.csv', 'heatpad_I_51.csv', 'laptop_will_3.csv', 'laptop_ricky_2.csv', 'laptop_will_95.csv', 'kettle_ricky_97.csv', 'inductioncooktop_ricky_53.csv', 'laptop_will_174.csv', 'inductioncooktop_ricky_62.csv', 'kettle_ricky_35.csv', 'inductioncooktop_ricky_14.csv', 'hairdryer-low_ricky_13.csv', 'heatpad_II_7.csv', 'heatpad_I_13.csv', 'laptop_will_83.csv'

In [0]:
model_dir_path = '/content/gdrive/My Drive/Models'
# Check that this directory exists.
os.path.isdir(model_dir_path)

# Make a new directory for our MNIST models
example_path = os.path.join(model_dir_path, 'emonpi')
print('example_path',example_path)

if not os.path.isdir(example_path):
 os.makedirs(example_path)
 print('example path created')

checkpoint_path = os.path.join(example_path, 'lq_model_1.h5')
print('checkpoint_path',checkpoint_path)
# Create a keras callback that saves our model during training.
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path)

In [0]:

def convert_str_to_int(str_list,keys): #converts labels from string to integer 
  list_int = []
  for i in str_list:
    list_int.append(appliance_dict[i])
  return list_int

def convert_int_to_str(integer,keys):
  for appliance,index in keys.items():
    if index == integer:
        #print(integer,appliance)
        return appliance
      
def make3D(array,n): #adds channels to array to simulate an image
  return (np.stack((array,)*n, axis=-1))

------------------------------------------------------------------
# NOISE AND DATA WARPING FUNCTIONS
------------------------------------------------------------------

In [0]:
import numpy as np
from random import randint
import tensorflow as tf


def warp_trim_pad(feature, length):
    """
    Helper function that trims or pads arrays if they have length > length
    :param feature- a feature to be trimmed or padded with zeros, length - a number specifying the length to be trimmed to
    :return: the same feature array, trimmed or padded with zeros
    """
    padded_feature = []
    if (len(feature) > length):
        padded_feature = feature[0:length]
    elif (len(feature) < length):
        padding = np.zeros(length-len(feature))
        padded_feature = np.concatenate([feature, padding])
    return np.array(padded_feature).astype('float32')

def warp_shrink(feat, percentage):
    """
    Takes in a feature in the form of a numpy array, takes a random sample from the array and shrinks it by half.
    The size of the random sample is equal to 1/10 of the size of the source array.  Replaces sample with pair-wise average of sample indices.
    Approach inspired from this paper: https://aaltd16.irisa.fr/files/2016/08/AALTD16_paper_9.pdf
    :param feat - feature to be warped in form of numpy array, percentage - proportion of sample to be warped
    :return: Feature array with sample shrank
    """
    #Get length of array
    sample_range = feat.shape[0]

    #Calculate length of sample
    sample_tenth = int(sample_range*percentage)

    #Shift sample length to be even to allow for pair-wise averages
    if sample_tenth % 2 == 1:
        sample_tenth += 1

    # Generate random index for start of sample
    sample_index = randint(0, sample_range - (sample_tenth))

    #Shift sample index to be even to allow for pair-wise averages
    if sample_index % 2 == 1:
        sample_index += 1

    # If sample will overlap with end of sample, shift sample back to fit in array
    if (sample_index+sample_tenth) > len(feat):
        sample_index = len(feat)-sample_tenth
        sample = feat[sample_index:int(sample_index + sample_tenth)]
    else:
        # Subset feature array into sample
        sample = feat[sample_index:int(sample_index + sample_tenth)]

    #Subset feature array into sample
    sample = feat[sample_index:int(sample_index+sample_tenth)]
    # print('sample index', sample_index)
    # print('sample_tenth', sample_tenth)
    # print('sample length', len(sample))

    avg_arr = []
    for i in range(0, sample_tenth, 2): #iterate thru pairs of elements in sample, calculating average
        avg = (sample[i]+sample[i+1])/2
        avg_arr.append(avg)

    avg_arr = np.array(avg_arr)

    # Replace sample with shrunk average of samples
    feat_before = feat[:sample_index]
    feat_after = feat[int(sample_index+sample_tenth):]
    feat_transformed = np.concatenate([feat_before,avg_arr,feat_after])

    return warp_trim_pad(feat_transformed,50)

def warp_stretch(feat, percentage):
    """
    Takes in a feature in the form of a numpy array, takes a random sample from the array and shrinks it by half.
    The size of the random sample is equal to 1/10 of the size of the source array.  Appends pair-wise average of every other sample index.
    Approach inspired from this paper: https://aaltd16.irisa.fr/files/2016/08/AALTD16_paper_9.pdf
    :param: feat - feature to be warped in form of numpy array, percentage - proportion of sample to be warped
    :return: Feature array with sample stretched
    """

    # Get length of array
    sample_range = feat.shape[0]

    # Calculate length of sample
    sample_tenth = int(sample_range * percentage)

    # Shift sample length to be even to allow for pair-wise averages
    if sample_tenth % 2 == 1:
        sample_tenth += 1

    # Generate random index for start of sample
    sample_index = randint(0, sample_range - (sample_tenth))

    # Shift sample index to be even to allow for pair-wise averages
    if sample_index % 2 == 1:
        sample_index += 1

    # If sample will overlap with end of sample, shift sample back to fit in array
    if (sample_index+sample_tenth) > len(feat):
        sample_index = len(feat)-sample_tenth
        sample = feat[sample_index:int(sample_index + sample_tenth)]
    else:
        # Subset feature array into sample
        sample = feat[sample_index:int(sample_index + sample_tenth)]
    # print('sample index', sample_index)
    # print('sample_tenth', sample_tenth)
    # print('sample length', len(sample))

    stretch_arr = []
    for i in range(0, sample_tenth,2): #Iterate thru each pair of elements in the sample
        if (i > 0 & i <= sample_tenth-1):  #Get average of previous index and current index
            avg_prev = (sample[i-1]+sample[i])/2
            stretch_arr.append(avg_prev)
        # print(len(sample), 'current index', i)
        avg = (sample[i]+sample[i+1])/2 #Get average of current index
        stretch_arr.append(sample[i])
        stretch_arr.append(avg)
        stretch_arr.append(sample[i+1])

    stretch_arr = np.array(stretch_arr)

    #Replace sample with stretched sample
    feat_before = feat[:sample_index]
    feat_after = feat[int(sample_index+sample_tenth):]
    feat_transformed = np.concatenate([feat_before,stretch_arr,feat_after])
    return warp_trim_pad(feat_transformed,50)

def warp_features(observation, percentage):
    """"
    Applies warp shrink/stretch at random to numpy array of feature arrays.
    Parameters: observation - numpy array of arrays, percentage - proportion of sample to be warped
    Returns: warped numpy array of arrays
    """
    new_obs = []
    for feature in range(0,observation.shape[1]): #Iterates thru each feature array
        if (randint(0,1) > 0):
            if (len(observation[:,feature])>50):
              print()
            warped_feature = warp_stretch(observation[:,feature],percentage) # applies stretch randomly
        else:
            warped_feature = warp_shrink(observation[:,feature],percentage) # else applies shrink
        new_obs.append(warped_feature)

    return np.asarray(new_obs)

def pad_features(observation, length):
    """"
    Applies trim/pad to numpy array of feature arrays.
    Parameters: observation - numpy array of arrays, length - length to trim arrays to
    Returns: padded numpy array of arrays
    """
    new_obs = []
    for feature in range(0,observation.shape[1]):
        warped_feature = warp_trim_pad(observation[:,feature],length) # applies trim/pad to feature array
        new_obs.append(warped_feature)

    return np.asarray(new_obs)



------------------------------------------------------------------
# LOAD AND PREPROCESS DATA
------------------------------------------------------------------

In [0]:
data_path = training_path #"/Users/willbuchanan/Google Drive/GIX/ricky-will-louis/data/training/"


def process_data(filepath,labels=[],data=[]):
#     min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))  # define range for scale operation
    vars = ['time', 'power_factor', 'phase_angle', 'power_real', 'power_reactive', 'power_apparent', 'vrms', 'irms']
    cwd = os.chdir(filepath)
    for appliance_type in os.listdir(cwd):
        #print("\n",appliance_type)
        if appliance_type.endswith('.csv'):
            #split_file = str(filename).split("_")
            #print(appliance_type)
            app_df = pd.read_csv(filepath + appliance_type)
            app_df.columns = app_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
            app_arr = []
            for i in range(0, len(app_df['time'])):
                data_row = []
                for column in vars:
                    data_row.append(app_df[column][i])
                app_arr.append(data_row)
            # app_arr_normalized = min_max_scaler.fit_transform(app_arr) #must scale when in 2D array form
            data.append(np.array(app_arr))
            label = str(app_df['app_name'][0]).split('-')[0]
            if label == 'ricky':
              print(appliance_type)
#             print('label:',label)
            labels.append(label)#str(app_df['app_name'][0]).split('-')[0])
    return labels,data

dataset_labels_str,dataset_data = process_data(data_path) #extracts CSV into labels & data
dataset_data_np = tf.keras.preprocessing.sequence.pad_sequences(np.asarray(dataset_data), value=0, maxlen=50) #pads length to 50
print(set(dataset_labels_str))

#--------------------extracts labels & assigns to appliance dictionary--------------------


labelset = list(set(dataset_labels_str))

appliance_dict = {}
for i,appliance in enumerate(labelset):
  appliance_dict[appliance] = i
print('appliance_dict',appliance_dict)

dataset_labels_int = convert_str_to_int(dataset_labels_str, appliance_dict) #converts from strings to integer
print('label sample:',dataset_labels_int[:5],dataset_labels_str[:5])

min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))  # define range for scale operation
scaled_dataset = []
for i,sample in enumerate(dataset_data_np):
    min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))  # define range for scale operation
    app_arr_normalized = min_max_scaler.fit_transform(sample) #must scale when in 2D array form
    scaled_dataset.append(app_arr_normalized)

scaled_dataset = np.array(scaled_dataset)
scaled_dataset_3D = make3D(scaled_dataset,3)

# -- Split data into training and test subsets
data_train, data_test, labels_train, labels_test = train_test_split(scaled_dataset, dataset_labels_int, test_size=0.20)#, random_state=42) #initialize random_state to get same result every time
print('train data shape:',data_train.shape,'# of train labels:',len(labels_train),'\nunique train labels:',set(labels_train))
print('test data shape:',data_test.shape,'# of test labels:',len(labels_test),'\nunique test labels:',set(labels_test))


num_labels = len(set(labels_train))
print(num_labels,'labels created')

# Let's first make sure the shape and type of our data is correct.
# Convert data to float32 datatype and labels to int64 datatype.
train_data = tf.cast(data_train, tf.float32)
train_labels = tf.cast(labels_train, tf.int64)
test_data = tf.cast(data_test, tf.float32)
test_labels = tf.cast(labels_test, tf.int64)

# When working with images, TensorFlow needs them to be shape [H, W, C], but
# # our data is just [H, W] right now since its black and white. Let's add a extra channel axis.
train_data = train_data[..., tf.newaxis]
test_data = test_data[..., tf.newaxis]

# # Now were ready to create Tensorflow Datasets!
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))


# Finally, let's shuffle our training data and batch it so its more efficient.
train_dataset = train_dataset.shuffle(20).batch(1000)
test_dataset = test_dataset.shuffle(20).batch(1000)


# -- Transform data labels from string to int for use in model
le = preprocessing.LabelEncoder()
le.fit(list(set(labels_train)))
train_labels = le.transform(labels_train)
test_labels = le.transform(labels_test)


# # -- Reshape train data for use in model
nsamples, nx, ny = data_train.shape
train_data_2d = data_train.reshape((nsamples,nx*ny))

# # -- Reshape test data for use in model
nsamples, nx, ny  = data_test.shape
test_data_2d = data_test.reshape((nsamples,nx*ny))

nsamples, nx, ny  = scaled_dataset.shape
dataset_data_np_2d = scaled_dataset.reshape((nsamples,nx*ny))

{'cell', 'kettle', 'inductioncooktop', 'laptop', 'hairdryer', 'LEDpicture', 'heatpad'}
appliance_dict {'cell': 0, 'kettle': 1, 'inductioncooktop': 2, 'laptop': 3, 'hairdryer': 4, 'LEDpicture': 5, 'heatpad': 6}
label sample: [6, 4, 2, 1, 3] ['heatpad', 'hairdryer', 'inductioncooktop', 'kettle', 'laptop']
train data shape: (576, 50, 8) # of train labels: 576 
unique train labels: {0, 1, 2, 3, 4, 5, 6}
test data shape: (144, 50, 8) # of test labels: 144 
unique test labels: {0, 1, 2, 3, 4, 5, 6}
7 labels created


In [0]:
print(data_train.shape)
print(list(set(labels_train)))
print(train_labels)
print(scaled_dataset.shape)
print(dataset_data_np_2d.shape)

(576, 50, 8)
[0, 1, 2, 3, 4, 5, 6]
[3 5 1 3 2 6 1 5 4 1 3 6 3 6 1 3 6 4 0 6 2 6 0 6 6 3 3 6 6 2 3 4 3 4 4 4 5
 4 3 1 1 6 2 6 4 3 3 6 0 6 6 1 4 1 3 4 5 6 6 2 3 2 4 6 3 2 6 4 6 6 0 4 6 6
 6 4 6 4 4 6 6 3 2 1 0 1 5 3 0 3 1 2 2 3 6 4 2 1 3 4 4 3 2 3 4 3 6 3 4 3 6
 3 1 2 3 1 6 6 3 3 1 0 3 1 1 5 4 2 0 4 6 4 3 3 2 6 4 3 3 3 2 4 3 1 3 0 6 3
 4 3 3 6 1 6 3 4 1 6 1 2 2 3 3 1 2 4 3 4 4 6 3 4 2 3 4 3 3 4 3 1 3 6 3 4 2
 3 3 3 6 2 6 3 4 4 2 3 3 3 6 6 4 1 6 1 2 6 3 2 3 3 1 2 6 3 3 6 4 1 2 1 2 3
 3 1 3 6 4 2 4 6 4 2 1 6 4 3 6 1 3 4 6 1 2 2 2 3 1 4 6 2 3 1 6 1 2 4 3 5 3
 0 3 2 4 5 3 2 1 1 2 4 3 6 3 1 4 4 3 4 1 3 3 1 0 1 4 1 6 6 3 1 6 5 4 6 1 2
 2 6 1 1 6 2 3 1 3 4 2 2 6 1 4 1 1 2 6 3 2 6 1 1 4 4 5 2 1 1 3 1 6 0 3 1 6
 3 3 4 6 2 4 2 4 1 3 3 3 3 2 3 2 2 6 2 6 3 3 4 2 3 3 4 1 3 3 3 3 4 3 4 5 1
 1 3 6 3 5 2 3 3 4 2 3 3 6 2 1 2 3 3 6 2 6 3 4 3 3 3 6 3 2 4 3 1 1 2 3 1 2
 3 3 1 3 3 4 1 4 4 6 3 4 2 1 1 6 4 4 1 2 3 3 2 2 3 3 5 6 2 6 6 3 6 6 4 4 6
 4 1 3 6 3 2 3 3 4 3 1 6 3 4 6 4 3 6 6 2 4 6 1 3 2 3 6 6 5 3 3 2 

------------------------------------------------------------------
# SUPERVISED LEARNING MODELS 
------------------------------------------------------------------

In [0]:
inv_map

In [0]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn import svm, ensemble

def svc_param_selection(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10, 100]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(svm.SVC(kernel='rbf',decision_function_shape='ovo'), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_


def gbm_param_selection(x,y,nfolds):
    parameters = {
      "loss":["deviance"],
      "learning_rate": [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
      "min_samples_split": np.linspace(0.1, 0.5, 12),
      "min_samples_leaf": np.linspace(0.1, 0.5, 12),
      "max_depth":[3,5,8],
      "max_features":["log2","sqrt"],
      "criterion": ["friedman_mse",  "mae"],
      "subsample":[0.5, 0.618, 0.8, 0.85, 0.9, 0.95, 1.0],
      "n_estimators":[10,20]
    }
    clf = GridSearchCV(ensemble.GradientBoostingClassifier(verbose=1), parameters, cv=nfolds, n_jobs=-1)
    clf.fit(x, y)
    return clf.best_params_



In [0]:
np.linspace(0.1, 0.5, 5)

In [0]:
# print(svc_param_selection(dataset_data_np_2d, dataset_labels_int, 10))
print(gbm_param_selection(dataset_data_np_2d, dataset_labels_int, 10))

#gbm_params = gbm_param_selection(dataset_data_np_2d, dataset_labels_int, 10)

In [0]:
gbm_parameters = {
        "loss":"deviance",
        "learning_rate": 0.2,
        "min_samples_split":0.2,
        "min_samples_leaf":0.1,
        "max_depth":5,
        "max_features":"sqrt",
        "criterion": "friedman_mse",
        "subsample":1.0,
        "n_estimators":10
        }

In [0]:
from sklearn.naive_bayes import GaussianNB,MultinomialNB
models = [
    RandomForestClassifier(n_estimators=250, max_depth=4, random_state=0),
    LinearSVC(),
    SVC(C = 10, gamma=0.01, decision_function_shape='ovo'),
    ensemble.GradientBoostingClassifier(**gbm_parameters, verbose=1),
    MultinomialNB(),
    GaussianNB(),
    LogisticRegression(random_state=0),
]
CV = 5
cv_df = pd.DataFrame(index=range(CV * len(models)))
entries = []
for model in models:
  model_name = model.__class__.__name__
  accuracies = cross_val_score(model, dataset_data_np_2d, dataset_labels_int, scoring='accuracy', cv=CV)
  for fold_idx, accuracy in enumerate(accuracies):
    entries.append((model_name, fold_idx, accuracy))
cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])

In [0]:
import seaborn as sns

sns.boxplot(x='model_name', y='accuracy', data=cv_df)
sns.stripplot(x='model_name', y='accuracy', data=cv_df, 
              size=8, jitter=True, edgecolor="gray", linewidth=2)
plt.show()

cv_df.groupby('model_name').accuracy.mean()

In [0]:
cv_df.sort_values(by='accuracy', ascending=False)

Unnamed: 0,model_name,fold_idx,accuracy
17,GradientBoostingClassifier,2,0.951389
2,RandomForestClassifier,2,0.951389
3,RandomForestClassifier,3,0.951389
18,GradientBoostingClassifier,3,0.951389
12,SVC,2,0.944444
4,RandomForestClassifier,4,0.943662
10,SVC,0,0.937931
32,LogisticRegression,2,0.9375
30,LogisticRegression,0,0.931034
11,SVC,1,0.931034


In [0]:
import pickle
filename = model_dir_path + 'lq_rf_model_1.sav'
rf = RandomForestClassifier(n_estimators=250, max_depth=4, random_state=0)

rf.fit(train_data_2d, labels_train)
rf_pred = svc.predict(test_data_2d)

pickle.dump(rf, open(filename, 'wb'))

In [0]:
label_df = pd.DataFrame.from_dict(appliance_dict, orient='index')
label_list = list(label_df.index)
n_classes

In [0]:
svc = SVC(C = 10, gamma=0.01, decision_function_shape='ovo')

svc.fit(train_data_2d, labels_train)
svc_pred = svc.predict(test_data_2d)

In [0]:
sns.set(font_scale=1.2)
conf_mat = confusion_matrix(labels_test, rf_pred)
fig, ax = plt.subplots(figsize=(12,10))
sns.heatmap(conf_mat, annot=True, fmt='d',
            xticklabels=label_list, yticklabels=label_list)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()


print("Test Accuracy: %.3f%%" % (accuracy_score(labels_test, rf)*100))
print(classification_report(labels_test, rf, target_names=label_list))


------------------------------------------------------------------
# NEURAL NETWORK MODELS 
------------------------------------------------------------------

In [0]:
model = tf.keras.Sequential() # Use keras sequential layers to build up a model.
# model.add(tf.keras.layers.Flatten(input_shape=[50,8))
model.add(tf.keras.layers.Flatten(input_shape=[50,8,3]))
model.add(tf.keras.layers.Dense(num_labels*8, activation='relu'))
model.add(tf.keras.layers.Dense(num_labels*8, activation='relu'))
model.add(tf.keras.layers.Dense(num_labels*4, activation='relu'))
model.add(tf.keras.layers.Dense(num_labels, activation='softmax'))

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy', metrics=['accuracy','sparse_categorical_crossentropy'])

model.summary()
print("\n\n")
# Train the model, keras does not need datasets to function, can just take raw numpy inputs!
history = model.fit(data_train, 
                    labels_train, 
                    epochs=20,
                    batch_size=10,
                    validation_data=(data_test,labels_test),
                    callbacks=[cp_callback])

In [0]:
# from random import randint

# def warp_shrink(feat):
#     """
#     Takes in a feature in the form of a numpy array, takes a random sample from the array and shrinks it by half.
#     The size of the random sample is equal to 1/10 of the size of the source array.  Replaces sample with pair-wise average of sample indices.
#     Approach inspired from this paper: https://aaltd16.irisa.fr/files/2016/08/AALTD16_paper_9.pdf
#     Parameters: feat - feature to be warped in form of numpy array
#     Returns: Feature array with sample shrank
#     """
#     print('shape:',feat.shape)
#     #Get length of array
#     sample_range = feat.shape[0]
# #     print('sample_range',sample_range)

#     #Calculate length of sample
#     sample_tenth = int(int(sample_range)*.1)

#     #Shift sample length to be even to allow for pair-wise averages
#     if sample_tenth % 2 == 1:
#         sample_tenth += 1

#     # Generate random index for start of sample
#     sample_index = randint(0, sample_range - (sample_tenth))

#     #Shift sample index to be even to allow for pair-wise averages
#     if sample_index % 2 == 1:
#         sample_index += 1

#     #Subset feature array into sample
#     sample = feat[sample_index:int(sample_index+sample_tenth)]

#     avg_arr = []
#     for i in range(0, sample_tenth, 2): #iterate thru pairs of elements in sample, calculating average
#         avg = (sample[i]+sample[i+1])/2
#         avg_arr.append(avg)

#     avg_arr = np.array(avg_arr)

#     # Replace sample with shrunk average of samples
#     feat_before = feat[:sample_index]
#     feat_after = feat[int(sample_index+sample_tenth):]
#     feat_transformed = np.concatenate([feat_before,avg_arr,feat_after])

#     return feat_transformed

# def warp_stretch(feat):
#     """
#     Takes in a feature in the form of a numpy array, takes a random sample from the array and shrinks it by half.
#     The size of the random sample is equal to 1/10 of the size of the source array.  Appends pair-wise average of every other sample index.
#     Approach inspired from this paper: https://aaltd16.irisa.fr/files/2016/08/AALTD16_paper_9.pdf
#     Parameters: feat - feature to be warped in form of numpy array
#     Returns: Feature array with sample stretched
#     """

#     # Get length of array
#     print('shape:',feat.shape)
#     sample_range = feat.shape[0]
# #     print('sample_range',sample_range)

#     # Calculate length of sample
#     sample_tenth = int(int(sample_range)*.1)

#     # Shift sample length to be even to allow for pair-wise averages
#     if sample_tenth % 2 == 1:
#         sample_tenth += 1

#     # Generate random index for start of sample
#     sample_index = randint(0, sample_range - (sample_tenth))

#     # Shift sample index to be even to allow for pair-wise averages
#     if sample_index % 2 == 1:
#         sample_index += 1

#     # Subset feature array into sample
#     sample = feat[sample_index:int(sample_index + sample_tenth)]

#     stretch_arr = []
#     for i in range(0, sample_tenth,2): #Iterate thru each pair of elements in the sample
#         if (i > 0 & i <= sample_tenth-1):  #Get average of previous index and current index
#             avg_prev = (sample[i-1]+sample[i])/2
#             stretch_arr.append(avg_prev)
#         avg = (sample[i]+sample[i+1])/2 #Get average of current index
#         stretch_arr.append(sample[i])
#         stretch_arr.append(avg)
#         stretch_arr.append(sample[i+1])

#     stretch_arr = np.array(stretch_arr)

#     #Replace sample with stretched sample
#     feat_before = feat[:sample_index]
#     feat_after = feat[int(sample_index+sample_tenth):]
#     feat_transformed = np.concatenate([feat_before,stretch_arr,feat_after])

#     return feat_transformed


# def warp_features(observation):
#     """"
#     Applies warp shrink/stretch at random to numpy array of feature arrays.
#     Parameters: observation - numpy array of arrays
#     Returns: warped numpy array of arrays
#     """
#     new_obs = []
#     for feature in range(0,observation.shape[1]): #Iterates thru each feature array
#         if (randint(0,1) > 0):
#             warped_feature = warp_stretch(observation[:,feature]) # applies stretch randomly
#         else:
#             warped_feature = warp_shrink(observation[:,feature]) # else applies shrink
#         new_obs.append(warped_feature)
#     np_array = np.asarray(new_obs)
#     return tf.convert_to_tensor(np_array)


# warped_data = warp_features(train_data)
# print(warped_data.shape)


In [0]:
#DATA AUGMENTATION

# preprocessing_function: function that will be implied on each input. 
#   The function will run after the image is resized and augmented. 
#   The function should take one argument: one image (Numpy tensor with rank 3), 
#     and should output a Numpy tensor with the same shape.
def apply_noise(array):#preprocessing_function...apply gaussian noise
#   return array
#   tensor = tf.convert_to_tensor(array)
#   shape = array.tf.get_shape()#.as_list()
#   print(type(tensor),tensor.get_shape().as_list())#,'shape',shape)
#   noise = np.random.normal(0,0.001,[50,8,3])#shape)#[144,50,8,1]) #or tensor.shape?
  return array #+ noise
  
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=apply_noise)
# train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=warp_features)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator()

labels_train_np_1d = make3D(np.array(labels_train),1) #must have an additional channel for generator function
labels_test_np_1d = make3D(np.array(labels_test),1) #must have an additional channel for generator function

print('train:',data_train.shape,type(data_train),'labels:',labels_train_np_1d.shape,type(labels_train_np_1d))
print('test:',data_test.shape,type(data_test),'labels:',labels_test_np_1d.shape,type(labels_test_np_1d))

train_generator = train_datagen.flow(
#     train_data, #tensor w/ added axis #TypeError: unsupported operand type(s) for /: 'Dimension' and 'int'
#     train_dataset, #(Tensor dataset input): TypeError: object of type 'DatasetV1Adapter' has no len()
#     data_train, #(NP input) ValueError: ('Input data in `NumpyArrayIterator` should have rank 4. You passed an array with shape', (144, 50, 8))
    data_train, #(NP input) ValueError: ('Input data in `NumpyArrayIterator` should have rank 4. You passed an array with shape', (144, 50, 8))
    labels_train_np_1d,
#     train_labels,
# dataset_data_np, #ValueError: ('Input data in `NumpyArrayIterator` should have rank 4. You passed an array with shape', (161, 50, 8))
#       dataset_labels_int,
    batch_size=10)

test_generator = test_datagen.flow(
    data_test, #(NP input required)
    labels_test_np_1d,
    batch_size=10)

In [0]:
aug_model = tf.keras.Sequential() # Use keras sequential layers to build up a model.
# aug_model.add(tf.keras.layers.Flatten(input_shape=[50,8]))
aug_model.add(tf.keras.layers.Flatten(input_shape=[50,8,3]))
aug_model.add(tf.keras.layers.Dense(num_labels*8, activation='relu'))
aug_model.add(tf.keras.layers.Dense(num_labels*8, activation='relu'))
aug_model.add(tf.keras.layers.Dense(num_labels*4, activation='relu'))
aug_model.add(tf.keras.layers.Dense(num_labels, activation='softmax'))

aug_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy', metrics=['accuracy','sparse_categorical_crossentropy'])
aug_model.summary()

print("\n\n")
print('train:',data_train.shape,type(data_train),'labels:',labels_train_np_1d.shape,type(labels_train_np_1d))
print('test:',data_test.shape,type(data_test),'labels:',labels_test_np_1d.shape,type(labels_test_np_1d))
print("\n\n")

aug_history = aug_model.fit_generator(train_generator, validation_data=test_generator, epochs=20)

In [0]:
louis_model = tf.keras.Sequential()
louis_model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=[50,8,3]))
louis_model.add(tf.keras.layers.BatchNormalization())
louis_model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
louis_model.add(tf.keras.layers.Dropout(0.3))
louis_model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
louis_model.add(tf.keras.layers.BatchNormalization())
louis_model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
louis_model.add(tf.keras.layers.Dropout(0.3))
louis_model.add(tf.keras.layers.Flatten())
louis_model.add(tf.keras.layers.Dense(256, activation='relu'))
louis_model.add(tf.keras.layers.BatchNormalization())
louis_model.add(tf.keras.layers.Dropout(0.5))
louis_model.add(tf.keras.layers.Dense(7, activation='softmax'))
louis_model.add(tf.keras.layers.BatchNormalization())
louis_model.add(tf.keras.layers.Activation('softmax'))

louis_model.summary()
print("\n\n")
,
louis_model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy','sparse_categorical_crossentropy'])

louis_history = model.fit(data_train, labels_train, epochs=20,batch_size=10,validation_data=(data_test,labels_test))


------------------------------------------------------------------
# TESTING MODELS
------------------------------------------------------------------

In [0]:
# def plot_history(histories, key='binary_crossentropy'):
def plot_history(histories, key='sparse_categorical_crossentropy'):
  
  plt.figure(figsize=(16,10))
    
  for name, history in histories:
    val = plt.plot(history.epoch, history.history['val_'+key],'--', label=name.title()+' Val')
    plt.plot(history.epoch, history.history[key], color=val[0].get_color(),label=name.title()+' Train')

  plt.xlabel('Epochs')
  plt.ylabel(key.replace('_',' ').title())
  plt.legend()

  plt.xlim([0,max(history.epoch)])

In [0]:
plot_history([('baseline', history), ('augmented', aug_history),('louis',louis_history)], key='acc')
plot_history([('baseline', history), ('augmented', aug_history),('louis',louis_history)], key='sparse_categorical_crossentropy')
plot_history([('baseline', history), ('augmented', aug_history),('louis',louis_history)], key='loss')

In [0]:
#{'hairdryer':0,'laptop':1,'inductioncooktop':2,'cell':3,'heatpad':4,'LEDpicture':5,'kettle':6}

# # Evaluate on test data after training.
# test_loss, test_acc = model.evaluate(data_test, labels_test)
# # test_loss, test_acc = model.evaluate(test_data, test_labels)
# print("Test Accuracy:", test_acc,'\n')

# Can also make predictions on individual samples.

inv_map = {v: k for k, v in appliance_dict.items()}

for i,data in enumerate(data_test):
  data = data[tf.newaxis, ...]
  print('data.shape',data.shape)
  guess_list = model.predict(data)
  print(guess_list)
#   guess = tf.argmax(guess_list, axis=-1).numpy()
#   topk = tf.nn.top_k(model.predict(data),k=3)#.numpy()
#   print(topk)
#   print('guess:',guess,convert_int_to_str(guess,appliance_dict))
  print('actual:',labels_test[i],convert_int_to_str(labels_test[i],appliance_dict))
#   print(guesses)
  for j in range (0,7):
    print(('guesses: %.2f%% %s'%(100*guess_list[0,j],inv_map[j])) )
  print('')

  guess_list.argmax

In [0]:
guess_list.

In [0]:
print(inv_map)
print(appliance_dict)

print(os.listdir(example_path))

In [0]:
# Let’s go ahead and load the model, this would be what we do when starting a new notebook.
model = tf.keras.models.load_model(checkpoint_path)
# Evaluate to make sure the accuracy is preserved.
model.evaluate(data_test, labels_test)