<a href="https://colab.research.google.com/github/SamuelXJames/Signal-Denoising-Autoencoder/blob/master/CNN1D_Optimize%20v0.3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#Additional Packages to Install
# !pip install pydrive
# !pip install git+https://github.com/hyperopt/hyperopt.git
# !pip install kopt
# !pip install oauth2client
# !pip install tensorboardcolab
# !pip install pymongo
# !pip install git+https://github.com/rthalley/dnspython.git
# !pip install dnspython

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import pydrive
import pymongo
import dns
from tensorboardcolab import *
from random import randint
from keras.layers import Dense,Conv1D,Input
from keras.layers import MaxPooling1D,UpSampling1D
from keras.layers import Activation,BatchNormalization
from keras.models import Model
from keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from kopt import CompileFN, KMongoTrials, test_fn
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials
from google.colab import drive




#MongoTrials
# SRV_string  = ''
# client = pymongo.MongoClient(SRV_string)
# client.test_database
# print(client)
# hostname = ''
# port = 27017



#Mounting Google Drive
# drive.mount('/content/gdrive')
# #Authenticate/Create PyDrive client 
# auth.authenticate_user()
# gauth = GoogleAuth()
# gauth.credentials = GoogleCredentials.get_application_default()
# drive = GoogleDrive(gauth)


  
#tbc=TensorBoardColab()
batch_size = 1000
validation_split = 0.15
timesteps = 2000
features = 1
global timesteps
global features


#Generate Test/Train Data
def create_data(timesteps = 2000):
  length = 1
  dt = 0.0005
  noise_factor = 0.35
  data_1 = []
  data_noisy_1 = []
  for i in range(batch_size):
      freq = 30*np.random.random() + 1
      t = np.arange(0,length,dt)
      #signal = np.arange(-0.5*length,0.5*length,dt)
      signal = np.sin(2*np.pi*freq*t)
      noisy_signal = signal+noise_factor*np.random.rand(len(t))
      data_1.append(signal)
      data_noisy_1.append(noisy_signal)
  data_1 = np.array(data_1)
  data_noisy_1 = np.array(data_noisy_1)
  
  #Scale Data - Need a better scaler than MinMaxScaler
  scaler = MinMaxScaler(feature_range = (0,1))
  data_1  = np.vstack((data_1,data_noisy_1))
  data_1 = scaler.fit_transform(data_1)
  signal = data_1[0:batch_size][:]
  signal_noisy = data_1[batch_size:2*batch_size][:]
  
  #Input Shaping
  features = 1
  timesteps = len(signal[0])
  shape = (batch_size,timesteps,features)
  #Input shape should equal [samples, timesteps, features]
  x = signal.reshape(shape)
  y = signal_noisy.reshape(shape)
  
  split_point = int((1-validation_split)*batch_size)
  
  x_train = x[0:split_point,:,:]
  y_train = y[0:split_point,:,:]
  x_test = x[split_point:batch_size,:,:]
  y_test = y[split_point:batch_size][:]

  return (x_train,y_train,features),(x_test,y_test)



#Gnerate Model For Optimization
def create_autoencoder(train_data,max_filters = 32, min_filters = 8, layers = 3,
                      kernel_size = 3,filter_spacing = 'lin'):
  
  timesteps = 2000
  features = 1
  inputs = Input(shape = (timesteps,features),name = 'Input')
  encoded = inputs
  
  #Linear Filter Range
  if filter_spacing == 'lin':
    filter_range = np.linspace(max_filters,min_filters,layers,
                               dtype = 'int64')
  #Half Filter Range
  if filter_spacing == 'half':
    if max_filters%2 != 0:
      max_filters+=1
    if min_filters%2 != 0:
      min_filters+=1
    filter_range = [max_filters]
    x = max_filters
    while x> min_filters:
      x = x/2
      filter_range.append(x)
    if filter_range[-1] < min_filters:
      del filter_range[-1]
    filter_range = np.asarray(filter_range,dtype = 'int64')
  
  #Random Filter Range
  if filter_spacing == 'random':
    filter_range = [randint(min_filters, max_filters) for z in range(layers)]
    filter_range.sort(reverse= True)
    filter_range = np.asarray(filter_range)
    
  conv_name = ''
  count = 1
  for num_filters in filter_range: 
    conv_name = 'Conv_Layer_No._{0}'.format(count)
    encoded = Conv1D(num_filters,kernel_size,padding = 'same',name = conv_name)(encoded)
    count+=1
    if(num_filters==max_filters):
      encoded = BatchNormalization()(encoded)
  
    encoded = Activation('relu')(encoded)
    encoded = MaxPooling1D(2,padding = 'same')(encoded)
  
  filter_range = np.sort(filter_range)
  for num_filters in filter_range: 
    conv_name = 'Conv_Layer_No._{0}'.format(count)
    encoded = Conv1D(num_filters,kernel_size,padding = 'same',name = conv_name)(encoded)
    count+=1
    encoded = Activation('relu')(encoded)
    encoded = UpSampling1D(2)(encoded)
    
  decoded = Conv1D(1,kernel_size,activation = 'sigmoid',padding = 'same')(encoded)
  
  autoencoder = Model(inputs,decoded)
  autoencoder.summary()
  optimizer = RMSprop(lr=1e-3)
  autoencoder.compile(optimizer = optimizer, loss = 'mse')
  
  return autoencoder





#HyperParameter Range
hyper_params = {
    "data":{
        'timesteps': 2000
        
        
    },
    "model":{
        "max_filters":hp.choice('mx_filters',np.arange(32,320,dtype = 'int32')),
        "min_filters":hp.choice('mn_filters',np.arange(4,32,dtype = 'int32')),
        'layers':hp.choice('ly',np.arange(1,3,dtype = 'int32')),
        'filter_spacing':hp.choice('fs',['lin','half','random'])
        
    },
    "fit":{
        'epochs': 1,
        'patience': 3
    }
}



#NN Evaluation
db_name = 'Autoencoder'
exp_name ='exp_1'
objective = CompileFN(db_name,exp_name,
                      data_fn = create_data,
                      model_fn = create_autoencoder,
                      loss_metric = 'loss',
                      loss_metric_mode = 'min',
                      valid_split = None,
                      save_model = 'best',
                      save_results = True)
                      
                      



trials = Trials()
best = fmin(objective, hyper_params, trials=trials, algo=tpe.suggest, max_evals=2)

trial_data = trials.results

#Organize trials into table
val_loss = []
loss = []
filter_spacing = []
max_filters = []
min_filters = []
layers = []
for i in range(len(trial_data)):
    trial = trial_data[i]
    val_loss.append('%.6f'%(trial['history']['loss']['val_loss'][0])) 
    loss.append('%.6f'%(trial['history']['loss']['loss'][0]))
    filter_spacing.append(trial['param']['model']['filter_spacing'])
    max_filters.append(trial['param']['model']['max_filters'])
    min_filters.append(trial['param']['model']['min_filters'])
    layers.append(trial['param']['model']['layers'])


new_dict = {'val_loss':val_loss,'loss':loss,
            'f_spacing':filter_spacing,'max_f':max_filters,'min_f':min_filters}

data_frame = pd.DataFrame.from_dict(new_dict, orient='columns')
print(data_frame)
    

#Mongo Trials - Cont'd
#trials = MongoTrials(SRV_string,exp_key = 'Optimization_1')
#trials = MongoTrials('mongo://{0}:{1}/{2}/jobs'.format(hostname,port,db_name), exp_key='exp1')
#trials = KMongoTrials(db_name, exp_name,
#                      ip=hostname,
#                      port=port)





 
#Save Model to Google Drive
# model_file = drive.CreateFile({'title' : 'CNN1D_v1.0.h5'})
# model_file.SetContentFile('model.h5')
# model_file.Upload()
# drive.CreateFile({'id': model_file.get('id')})
  
  
  
  
  
  

2018-12-30 05:37:42,783 [INFO] tpe_transform took 0.022910 seconds
2018-12-30 05:37:42,785 [INFO] TPE using 0 trials
2018-12-30 05:37:42,799 [INFO] Load data...


MongoClient(host=['autoencoderoptimization-shard-00-02-08kaz.mongodb.net:27017', 'autoencoderoptimization-shard-00-00-08kaz.mongodb.net:27017', 'autoencoderoptimization-shard-00-01-08kaz.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin', replicaset='AutoEncoderOptimization-shard-0', ssl=True, retrywrites=True)


2018-12-30 05:37:43,636 [INFO] Fit...


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 2000, 1)           0         
_________________________________________________________________
Conv_Layer_No._1 (Conv1D)    (None, 2000, 44)          176       
_________________________________________________________________
batch_normalization_5 (Batch (None, 2000, 44)          176       
_________________________________________________________________
activation_17 (Activation)   (None, 2000, 44)          0         
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, 1000, 44)          0         
_________________________________________________________________
Conv_Layer_No._2 (Conv1D)    (None, 1000, 22)          2926      
_________________________________________________________________
activation_18 (Activation)   (None, 1000, 22)          0         
__________

2018-12-30 05:38:11,890 [INFO] Evaluate...
2018-12-30 05:38:15,225 [INFO] Done!
2018-12-30 05:38:15,253 [INFO] tpe_transform took 0.026401 seconds
2018-12-30 05:38:15,254 [INFO] TPE using 1/1 trials with best loss 0.016185
2018-12-30 05:38:15,265 [INFO] Load data...
2018-12-30 05:38:16,061 [INFO] Fit...


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 2000, 1)           0         
_________________________________________________________________
Conv_Layer_No._1 (Conv1D)    (None, 2000, 290)         1160      
_________________________________________________________________
batch_normalization_6 (Batch (None, 2000, 290)         1160      
_________________________________________________________________
activation_25 (Activation)   (None, 2000, 290)         0         
_________________________________________________________________
max_pooling1d_13 (MaxPooling (None, 1000, 290)         0         
_________________________________________________________________
Conv_Layer_No._2 (Conv1D)    (None, 1000, 145)         126295    
_________________________________________________________________
activation_26 (Activation)   (None, 1000, 145)         0         
__________

2018-12-30 05:38:52,246 [INFO] Evaluate...
2018-12-30 05:38:57,060 [INFO] Done!


[{'loss': 0.01618463173508644, 'status': 'ok', 'eval': {'loss': 0.01618463173508644}, 'param': {'data': {'timesteps': 2000}, 'fit': {'epochs': 1, 'patience': 3, 'batch_size': 32, 'early_stop_monitor': 'val_loss'}, 'model': {'filter_spacing': 'half', 'layers': 1, 'max_filters': 43, 'min_filters': 4}}, 'path': {'model': '/root/.kopt/data//Autoencoder/exp_1//train_models/c71a4995-ffda-442f-bc8c-a1561ba3e901.h5', 'results': '/root/.kopt/data//Autoencoder/exp_1//train_models/c71a4995-ffda-442f-bc8c-a1561ba3e901.json'}, 'name': {'data': 'create_data', 'model': 'create_autoencoder', 'optim_metric': 'loss', 'optim_metric_mode': 'loss'}, 'history': {'params': {'batch_size': 32, 'epochs': 1, 'steps': None, 'samples': 850, 'verbose': 2, 'do_validation': True, 'metrics': ['loss', 'val_loss']}, 'loss': {'epoch': [0], 'val_loss': [0.016184631971021494], 'loss': [0.04198666546274634]}}, 'time': {'start': '2018-12-30 05:37:42.798942', 'end': '2018-12-30 05:38:15.224955', 'duration': {'total': 32.42601