# Ray Tune Environment Starter

In [None]:
## Dependencies for Google Colab environment 

print("Setting up colab environment")
!pip uninstall -y -q pyarrow
!pip install -q https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev5-cp36-cp36m-manylinux1_x86_64.whl
!pip install -q ray[debug]

# # A hack to force the runtime to restart, needed to include the above dependencies.
print("Done installing! Restarting via forced crash (this is not an issue).")
import os
os._exit(0)

## TensorFLow for Colab

In [None]:
## TensorFlow 2.0 for Colab

try:
   # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

## Ray Tune Library Import

In [None]:
import tensorflow as tf
try:
    tf.get_logger().setLevel('INFO')
except Exception as exc:
    print(exc)
import warnings
warnings.simplefilter("ignore")

import os
import numpy as np
import torch
import torch.optim as optim
from torchvision import datasets
from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet 

import ray
from ray import tune
from ray.tune import track
from ray.tune.schedulers import PopulationBasedTraining
from ray.tune.util import validate_save_restore
from ray.tune.schedulers import AsyncHyperBandScheduler

%matplotlib inline
import matplotlib.style as style
import matplotlib.pyplot as plt
style.use("ggplot")


# Function codes

## Test Function

In [None]:
import torch
import numpy as np
import pandas as pd


def test(test_inputs, test_labels, net):
    test_X = torch.Tensor(test_inputs).view(-1, 5)
    test_y = torch.Tensor(test_labels)

    predictionNumpy = []
    with torch.no_grad():
        for i in range(0, len(test_X)):
            net_out = net(test_X[i].view(-1, 5))
            predictionNumpy.append(net_out[0].numpy())              # The output from the net is a tensor which contains only one element which is a list. The list contains the 3 output values. We only want the list, not the tensoor containing one element which is a list.

    experimental = []
    for data in test_y:
        experimental.append(data.numpy())

    squared_error_X = []
    squared_error_N = []
    squared_error_L = [] 
    squared_error_C = [] 

    for i in range(0, len(experimental)):
            X_error = experimental[i][0] - predictionNumpy[i][0]
            N_error = experimental[i][1] - predictionNumpy[i][1]
            L_error = experimental[i][2] - predictionNumpy[i][2]
            C_error = experimental[i][3] - predictionNumpy[i][3]
            squared_error_X.append(X_error**2)
            squared_error_N.append(N_error**2)
            squared_error_L.append(L_error**2)
            squared_error_C.append(C_error**2)

    MSE_X1 = sum(squared_error_X[0:14])/14
    MSE_N1 = sum(squared_error_N[0:14])/14
    MSE_L1 = sum(squared_error_L[0:14])/14
    MSE_C1 = sum(squared_error_L[0:14])/14
    
    MSE_X2 = sum(squared_error_X[14:28])/14         # To accomodate more than 1 test sets 
    MSE_N2 = sum(squared_error_N[14:28])/14
    MSE_L2 = sum(squared_error_L[14:28])/14
    MSE_C2 = sum(squared_error_C[14:28])/14
    MSE_list = [MSE_X1, MSE_N1, MSE_L1, MSE_C1, MSE_X2, MSE_N2, MSE_L2, MSE_C2]
    AVG_MSE = sum(MSE_list)/8

    return AVG_MSE


## Train Function

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
					  
def train(net, inputs, labels, EPOCHS, l_rate, BATCH_SIZE):
	net.train()                                                                         
	optimiser = optim.Adam(net.parameters(), lr = l_rate)									   # net.parameters(): all of the adjustable parameters in our network. lr: a hyperparameter adjusts the size of the step that the optimizer will take to minimise the loss.
	loss_function = nn.MSELoss(reduction='mean')

	X = torch.Tensor(inputs).view(-1, 5)
	y = torch.Tensor(labels)
			 		   
	for epoch in range(EPOCHS):
		for i in tqdm(range(0, len(X), BATCH_SIZE)):
			batch_X = X[i:i+BATCH_SIZE].view(-1, 5)
			batch_y = y[i:i+BATCH_SIZE]

			optimiser.zero_grad()
			outputs = net(batch_X)
			loss = loss_function(outputs, batch_y)
			loss.backward()
			optimiser.step()


## Replication Function

In [None]:
import numpy as np
import pandas as pd

## Create a function which takes in a dataset and replicates it
def replicate_data(data, replications, noise):                                                      # Create a function which accepts a dataset and replicates it
    cols = list(data.columns) 
    dataR = data[cols[0:4]]                                                                         # Create a secondary dataframe containing only columns 1-3 (the columns we want to replicate)
    df = data                                                                                       # Create the output dataframe that will contain both the original and the replicated data
    new_data = pd.DataFrame(columns=data.columns)
    i = 0                                                                                           # Initialise replication counter to 0
    while i < replications:
        replicated_data =  np.random.uniform(dataR-dataR*noise, dataR+dataR*noise)                  # Create random noise for each value in columns 2-4 of dataset
        replicated_data = pd.DataFrame(data=replicated_data, index=None, columns=dataR.columns)      # Cast the replicated data as a pandas DataFrame Object
        replicated_data['LI'] = df[cols[4]]                                                          # Add the missing light intensity column back into the replicated_data set
        new_data = new_data.append(replicated_data, ignore_index=True, sort=False)
        i += 1
    return new_data



## FNN Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(777)

class Net(nn.Module):
	'''
	This Class Defines the Structure of the Artificial Neural Network
	'''
	def __init__(self, HN1, HN2):	
		self.HN1 = HN1			
		self.HN2 = HN2
		super().__init__()                            # Run the intitialision method from base class nn.module.
		self.fc1 = nn.Linear(5, self.HN1)             # Define the first fully connected layer. nn.Linear simply connects the input nodes to the output nodes in the standard way. The input layer contains 5 nodes.                                                            The output layer (first hidden layer), consists of 15 nodes.
		self.fc2 = nn.Linear(self.HN1, self.HN2)      # Hidden layer 2: each node takes in 15 values, contains 15 nodes hence outputs 15 values.
		self.fc3 = nn.Linear(self.HN2, 4)             # Output Layer: each node takes in 15 values, contain 4 nodes (one for each rate of change: X, N and Lu) hence outputs 4 (ary: previously 3) values.

	def forward(self, x):                             # This method feeds data into the network and propagates it forward.
		x = torch.sigmoid(self.fc1(x))                # Feed the dataset, x, through fc1 and apply the Sigmoid activation function to the weighted sum of each neuron. Then assign the transformed dataset to x. Next,                                                        feed the transformed dataset through fc2 and so on... until we reach the output layer. The activation fucntion basically decides if the neuron is 'firing'                                                                like real neurones in the human brain. The activation function prevents massive output numbers.
		x = torch.sigmoid(self.fc2(x))
		x = self.fc3(x)
		return x 
 


## Data Sets Import

In [None]:
# Clone the entire repo.
!git clone -l -s git://github.com/Arymega/FAME_Bioprocess_Simulation_with_RNN_and_FNN.git cloned-repo
%cd cloned-repo
!ls

## FNN Configuration

In [None]:
import pandas as pd
import numpy as np 
import copy
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import csv
import time
start_time = time.time()

# Load training data as pd dataframe and convert pd dataframe into numpy array.
training_data = pd.read_excel('/content/cloned-repo/Common Files/Datasets.xlsx', sheet_name='6Sets')
training_data_array = np.array(training_data)

# Standardise Training Data
scaler_train = StandardScaler()
scaler_train.fit(training_data)

# Split data into k=6 folds.
kf = KFold(n_splits=6)
kf.get_n_splits(training_data)

# Split training data set into 6 subsets containing k-1 folds before optimisation.
class wrapper(object):
    def __init__(self):
        self.value = []

subset_train1 = wrapper() 
subset_train2 = wrapper()
subset_train3 = wrapper()
subset_train4 = wrapper()
subset_train5 = wrapper()
subset_train6 = wrapper()
subset_test1 = wrapper() 
subset_test2 = wrapper()
subset_test3 = wrapper()
subset_test4 = wrapper()
subset_test5 = wrapper()
subset_test6 = wrapper()
subset_train_list = [subset_train1, subset_train2, subset_train3, subset_train4, subset_train5, subset_train6]
subset_test_list = [subset_test1, subset_test2, subset_test3, subset_test4, subset_test5, subset_test6]

index = 0
for train_index, test_index in kf.split(training_data):

    for row in train_index:
        subset_train_list[index].value.append(training_data_array[row])
    
    for row in test_index:
        subset_test_list[index].value.append(training_data_array[row])
    
    index +=1


# Standardise Test Data
for subset in subset_test_list:
    subset.value = scaler_train.transform(subset.value)

# Replicate and Standardise the training data in each subset.

columns = "B N F NIC LI".split()
for index, subset in enumerate(subset_train_list):
    df = pd.DataFrame(data=subset.value, index=None, columns=columns)
    ref = df
    df = scaler_train.transform(df)

    replicated_data1 = replicate_data(ref, 50, 0.03)
    replicated_data1 = scaler_train.transform(replicated_data1)
    df = np.append(df, replicated_data1, axis=0) 

    replicated_data2 = replicate_data(ref, 50, 0.05)
    replicated_data2 = scaler_train.transform(replicated_data2)
    df = np.append(df, replicated_data2, axis=0) 

    subset.value = df


# Calculate training and test labels
for index1, subset in enumerate(subset_train_list):
    a = []
    
    try:
        for index2, row in enumerate(subset.value):
            dB = subset.value[index2 + 1][0] - row[0]
            dN = subset.value[index2 + 1][1] - row[1]
            dF = subset.value[index2 + 1][2] - row[2]
            dNIC = subset.value[index2 + 1][3] - row[3]

            rates =[dB, dN, dF, dNIC]
            a.append(rates)
    except IndexError:
        rates = [0, 0, 0, 0]
        a.append(rates)
    
    a = np.array(a)
    subset.value = np.append(subset.value, a, axis=1) 

for index1, subset in enumerate(subset_test_list):
    b = []
    
    try:
        for index2, row in enumerate(subset.value):
            dB = subset.value[index2 + 1][0] - row[0] 
            dN = subset.value[index2 + 1][1] - row[1]
            dF = subset.value[index2 + 1][2] - row[2]
            dNIC = subset.value[index2 + 1][3] - row[3]

            rates =[dB, dN, dF, dNIC]
            b.append(rates)
    except IndexError:
        rates = [0, 0, 0, 0]
        b.append(rates)
    
    b = np.array(b)
    subset.value = np.append(subset.value, b, axis=1)


# Remove all 15th datapoints from corresponding training and testing sets
for subset in subset_train_list:
    count = 0
    decrement = 0
    for index, row in enumerate(subset.value):
        count +=1
        if count == 15:
            delete = index - decrement
            subset.value = np.delete(subset.value, delete, 0)
            decrement += 1
            count = 0

for subset in subset_test_list:
    subset.value = np.delete(subset.value, -1, 0)

subset_train_list = np.array(subset_train_list)
subset_test_list = np.array(subset_test_list)

# Shuffle Training Data
for subset in subset_train_list:
    np.random.shuffle(subset.value)


## Train_tune definition


In [None]:
# k-fold cross validation training loop
HL = 2   #Initial Configurations
HN1 = 5 
HN2 = 6 
EPOCHS = 3 
BATCH_SIZE = 100
LR = 0.001
MODELS = {}
h1 = HN1
h2 = HN2
e = EPOCHS
momentum = 0.9

def train_tune(config):
  lr=config["lr"]
  h1 = config["h1"]
  h2 = config["h2"]
  e=config["e"]
  net = Net(h1, h2)
  init_state = copy.deepcopy(net.state_dict())
 
  MSEs = []

  for count in range(iteration):

    for index, subset in enumerate(subset_train_list):

      subset.value = np.array(subset.value)
      subset_test_list[index].value = np.array(subset_test_list[index].value)
      
      net.load_state_dict(init_state)

      training_inputs = subset.value[:, 0:5]
      training_labels = subset.value[:, 5:]
      test_inputs = subset_test_list[index].value[:, 0:5]
      test_labels = subset_test_list[index].value[:, 5:]

      train(net, training_inputs, training_labels, e, LR, BATCH_SIZE)

      avg_mse = test(test_inputs, test_labels, net)
      MSEs.append(avg_mse)

    acc = sum(MSEs)/len(MSEs)
    tune.track.log(mean_accuracy=acc.item())
    MODELS['F{a}-{b}_{x}-{y}_{z}'.format(a=index+1, b=HL, x=h1, y=h2, z=e)] = acc


## Ray Tune Configuration 2

In [None]:
class PytorchTrainble(tune.Trainable):
    def _setup(self, config):
        self.device = torch.device("cpu")
        self.train_loader, self.test_loader = get_data_loaders()
        self.model = ConvNet().to(self.device)
        self.optimizer = optim.SGD(
            self.model.parameters(),
            lr=config.get("lr", 0.01),
            momentum=config.get("momentum", 0.9))

    def _train(self):
        net = Net(h1, h2)
        init_state = copy.deepcopy(net.state_dict())
        
        MSEs = []
        iteration = 1
        for count in range(iteration):

            for index, subset in enumerate(subset_train_list):

                subset.value = np.array(subset.value)
                subset_test_list[index].value = np.array(subset_test_list[index].value)
                
                net.load_state_dict(init_state)

                training_inputs = subset.value[:, 0:5]
                training_labels = subset.value[:, 5:]
                test_inputs = subset_test_list[index].value[:, 0:5]
                test_labels = subset_test_list[index].value[:, 5:]

                train(net, training_inputs, training_labels, e, LR, BATCH_SIZE)

                avg_mse = test(test_inputs, test_labels, net)
                MSEs.append(avg_mse)

            acc = sum(MSEs)/len(MSEs)
    
            return {"mean_accuracy": acc.item()}            

    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir, "model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return checkpoint_path

    def _restore(self, checkpoint_path):
        self.model.load_state_dict(torch.load(checkpoint_path))
        
    def reset_config(self, new_config):
        del self.optimizer
        self.optimizer = optim.SGD(
            self.model.parameters(),
            lr=new_config.get("lr", 0.01),
            momentum=new_config.get("momentum", 0.9))
        return True


ray.shutdown()  # Restart Ray defensively in case the ray connection is lost. 
ray.init(log_to_driver=False)

validate_save_restore(PytorchTrainble)
validate_save_restore(PytorchTrainble, use_object_store=True)
print("Success!")

##PBT Configuration

In [None]:
scheduler = PopulationBasedTraining(
    time_attr="training_iteration",
    metric="mean_accuracy",
    mode="min",
    perturbation_interval=5,
    hyperparam_mutations={
        # distribution for resampling
        "lr": lambda: np.random.uniform(0.0001, 0.0005),
        # allow perturbations within this set of categorical values
        "momentum": [0.8, 0.9, 0.99],
    }
)

In [None]:
ray.shutdown()  # Restart Ray defensively in case the ray connection is lost. 
ray.init(log_to_driver=False)

import time
start_time = time.time()

analysis = tune.run(
    PytorchTrainble,
    name="pbt_test",
    scheduler=scheduler,
    reuse_actors=True,
    verbose=1,
    stop={
        "training_iteration": 4,
    },
    num_samples=4,
    
    # PBT starts by training many neural networks in parallel with random hyperparameters. 
    config={
        "lr": tune.choice([0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007, 0.0008, 0.0009, 0.001, 0.002, 0.003, 0.004, 0.005,]),
        "h1" : tune.randint(5, 20),
        "h2" : tune.randint(5, 20),
        "e" : tune.randint(15, 500),
        "momentum": tune.choice([0.9]),
    })

print(f'\nDuration: {time.time() - start_time:.0f} seconds')

print('best config:', analysis.get_best_config("mean_accuracy"))

# Visualize all mutations of Population-based Training.
! cat ~/ray_results/pbt_test/pbt_global.txt

# Plot by wall-clock time

dfs = analysis.fetch_trial_dataframes()
# This plots everything on the same plot
ax = None
for d in dfs.values():
    ax = d.plot("training_iteration", "mean_accuracy", figsize=(25,5),ax=ax, legend=False)
plt.xlabel("epoch"); plt.ylabel("Test Accuracy"); 

sav_dir = '/content/drive/My Drive/Colab Notebooks/GitHub/MSc/FNN/Results/2HL/' #create a new folder for optimisation results
zfile = 'ray_results_2HL.zip'       #Save optimisation results as zip file
try:
  os.mkdir(sav_dir)
except:
  pass
!zip -r '{sav_dir}{zfile}' /root/ray_results

%load_ext tensorboard
%tensorboard --logdir ~/ray_results/pbt_test