In [None]:
#from ann2 import Net
import torch
import torch.nn as nn
import torch.optim as optim

torch.manual_seed(777)

class Net(nn.Module):
	'''
	This Class Defines the Structure of the Artificial Neural Network
	'''
	def __init__(self, HN1, HN2):	
		self.HN1 = HN1			
		self.HN2 = HN2
		super().__init__()                                                             # Run the intitialision method from base class nn.module.
		self.fc1 = nn.Linear(5, self.HN1)                                                    # Define the first fully connected layer. nn.Linear simply connects the input nodes to the output nodes in the standard way. The input layer contains 5 nodes. The output layer (first hidden layer), consists of 15 nodes.
		self.fc2 = nn.Linear(self.HN1, self.HN2)                                                   # Hidden layer 2: each node takes in 15 values, contains 15 nodes hence outputs 15 values.
		self.fc3 = nn.Linear(self.HN2, 4)                                                    # Output Layer: each node takes in 15 values, contain 3 nodes (one for each rate of change: X, N and Lu) hence outputs 3 values.

	def forward(self, x):                                                              # This method feeds data into the network and propagates it forward.
		x = torch.sigmoid(self.fc1(x))                                                 # Feed the dataset, x, through fc1 and apply the Sigmoid activation function to the weighted sum of each neuron. Then assign the transformed dataset to x. Next, feed the transformed dataset through fc2 and so on... until we reach the output layer. The activation fucntion basically decides if the neuron is 'firing' like real neurones in the human brain. The activation function prevents massive output numbers.
		x = torch.sigmoid(self.fc2(x))
		x = self.fc3(x)
		return x 


In [None]:
#from replicate import replicate_data 

import numpy as np
import pandas as pd

## Create a function which takes in a dataset and replicates it
def replicate_data(data, replications, noise):                                                      # Create a function which accepts a dataset and replicates it
    cols = list(data.columns) 
    dataR = data[cols[0:4]]                                                                         # Create a secondary dataframe containing only columns 1-3 (the columns we want to replicate)
    df = data                                                                                       # Create the output dataframe that will contain both the original and the replicated data
    new_data = pd.DataFrame(columns=data.columns)
    i = 0                                                                                           # Initialise replication counter to 0
    while i < replications:
        replicated_data =  np.random.uniform(dataR-dataR*noise, dataR+dataR*noise)                  # Create random noise for each value in columns 2-4 of dataset
        replicated_data = pd.DataFrame(data=replicated_data, index=None, columns=dataR.columns)      # Cast the replicated data as a pandas DataFrame Object
        replicated_data['LI'] = df[cols[4]]                                                          # Add the missing light intensity column back into the replicated_data set
        #replicated_data['NIC'] = df[cols[4]]                                                         # Add the missing nitrate inflow concentration back into the replicated_data set
        new_data = new_data.append(replicated_data, ignore_index=True, sort=False)
        i += 1
    return new_data



In [None]:
#from train import train
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

def train(net, inputs, labels, EPOCHS, l_rate, BATCH_SIZE):
	net.train()                                                                         
	optimiser = optim.Adam(net.parameters(), lr = l_rate)									   # net.parameters(): all of the adjustable parameters in our network. lr: a hyperparameter adjusts the size of the step that the optimizer will take to minimise the loss.
	loss_function = nn.MSELoss(reduction='mean')

	X = torch.Tensor(inputs).view(-1, 5)
	y = torch.Tensor(labels)

	for epoch in range(EPOCHS):
		for i in tqdm(range(0, len(X), BATCH_SIZE), disable=True):
			batch_X = X[i:i+BATCH_SIZE].view(-1, 5)
			batch_y = y[i:i+BATCH_SIZE]

			optimiser.zero_grad()
			outputs = net(batch_X)
			loss = loss_function(outputs, batch_y)
			loss.backward()
			optimiser.step()


In [None]:
import torch
import numpy as np
import pandas as pd


def test(test_inputs, test_labels, net):
    net.eval()
    test_X = torch.Tensor(test_inputs).view(-1, 5)
    test_y = torch.Tensor(test_labels)

    predictionNumpy = []
    with torch.no_grad():
        for i in range(0, len(test_X)):
            net_out = net(test_X[i].view(-1, 5))
            predictionNumpy.append(net_out[0].numpy())              # The output from the net is a tensor which contains only one element which is a list. The list contains the 3 output values. We only want the list, not the tensoor containing one element which is a list.

    experimental = []
    for data in test_y:
        experimental.append(data.numpy())

    squared_error_X = []
    squared_error_N = []
    squared_error_L = [] 
    squared_error_C = [] 

    for i in range(0, len(experimental)):
            X_error = experimental[i][0] - predictionNumpy[i][0]
            N_error = experimental[i][1] - predictionNumpy[i][1]
            L_error = experimental[i][2] - predictionNumpy[i][2]
            C_error = experimental[i][3] - predictionNumpy[i][3]
            squared_error_X.append(X_error**2)
            squared_error_N.append(N_error**2)
            squared_error_L.append(L_error**2)
            squared_error_C.append(C_error**2)

    MSE_X1 = sum(squared_error_X[0:14])/14
    MSE_N1 = sum(squared_error_N[0:14])/14
    MSE_L1 = sum(squared_error_L[0:14])/14
    MSE_C1 = sum(squared_error_L[0:14])/14
    
    MSE_X2 = sum(squared_error_X[14:28])/14
    MSE_N2 = sum(squared_error_N[14:28])/14
    MSE_L2 = sum(squared_error_L[14:28])/14
    MSE_C2 = sum(squared_error_C[14:28])/14
    MSE_list = [MSE_X1, MSE_N1, MSE_L1, MSE_C1, MSE_X2, MSE_N2, MSE_L2, MSE_C2]
    AVG_MSE = sum(MSE_list)/8


    LI1, LI2 = test_inputs[0][4], test_inputs[14][4]
   # NIC1, NIC2 = test_inputs[0][4], test_inputs[12][4]
    predictions_online = []
    for index, value in enumerate(test_inputs):
        BC = value[0] + predictionNumpy[index][0]
        NC = value[1] + predictionNumpy[index][1]
        LP = value[2] + predictionNumpy[index][2]
        NIC = value[3] + predictionNumpy[index][3]

        if index < 14:
            predictions_online.append([BC, NC, LP, NIC, LI1])

        if index >= 14:
            predictions_online.append([BC, NC, LP, NIC, LI2])

    predictions_offline = []
    BC1, BC2 = test_inputs[0][0], test_inputs[14][0]
    NC1, NC2 = test_inputs[0][1], test_inputs[14][1]
    LP1, LP2 = test_inputs[0][2], test_inputs[14][2]
    NIC1, NIC2 = test_inputs[0][3], test_inputs[14][3]
    for index, value in enumerate(test_inputs):
        if index < 14:
            net_out = net(torch.Tensor([BC1, NC1, LP1, NIC1, LI1]))
            BC = BC1 + net_out[0]   
            NC = NC1 + net_out[1]
            LP = LP1 + net_out[2]
            NIC = NIC1 + net_out[3]
            predictions_offline.append([float(BC), float(NC), float(LP), float(NIC), float(LI1)])
            BC1 = BC
            NC1 = NC
            LP1 = LP
            NIC1 = NIC
        
        if index >= 14:
            net_out = net(torch.Tensor([BC2, NC2, LP2, NIC2, LI2]))
            BC = BC2 + net_out[0] 
            NC = NC2 + net_out[1] 
            LP = LP2 + net_out[2] 
            NIC = NIC2 + net_out[3] 
            predictions_offline.append([float(BC), float(NC), float(LP), float(NIC), float(LI2)])
            BC2 = BC
            NC2 = NC
            LP2 = LP
            NIC2 = NIC
            
    return AVG_MSE, predictions_online, predictions_offline



In [None]:
# Clone the entire repo.
!git clone -l -s git://github.com/Arymega/RNN2.git cloned-repo
%cd cloned-repo
!ls

Cloning into 'cloned-repo'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects:   2% (1/34)[Kremote: Counting objects:   5% (2/34)[Kremote: Counting objects:   8% (3/34)[Kremote: Counting objects:  11% (4/34)[Kremote: Counting objects:  14% (5/34)[Kremote: Counting objects:  17% (6/34)[Kremote: Counting objects:  20% (7/34)[Kremote: Counting objects:  23% (8/34)[Kremote: Counting objects:  26% (9/34)[Kremote: Counting objects:  29% (10/34)[Kremote: Counting objects:  32% (11/34)[Kremote: Counting objects:  35% (12/34)[Kremote: Counting objects:  38% (13/34)[Kremote: Counting objects:  41% (14/34)[Kremote: Counting objects:  44% (15/34)[Kremote: Counting objects:  47% (16/34)[Kremote: Counting objects:  50% (17/34)[Kremote: Counting objects:  52% (18/34)[Kremote: Counting objects:  55% (19/34)[Kremote: Counting objects:  58% (20/34)[Kremote: Counting objects:  61% (21/34)[Kremote: Counting objects:  64% (22/34)[Kremote: Count

In [None]:
import torch
import pandas as pd
import numpy as np 
import os
import time
start_time = time.time()
#from ann2 import Net
#from replicate import replicate_data 
from sklearn.preprocessing import StandardScaler
#from train import train
#from test2 import test

# Load training and testing data as pd dataframe
training_data = pd.read_excel('/content/cloned-repo/NewDatasets2.xlsx', sheet_name='IOrtrain (1)')
testing_data = pd.read_excel('/content/cloned-repo/NewDatasets2.xlsx', sheet_name='IOrtest (1)')

# Standardise training and testing data
scaler_train = StandardScaler()
scaler_test = StandardScaler()

scaler_train.fit(training_data)
scaler_test.fit(testing_data)

testing_data = scaler_test.transform(testing_data)

# Convert training data to pd dataframe
columns = "BC NC LP NIC LI".split()
training_data = pd.DataFrame(data=training_data, index=None, columns=columns)

# Replicate the training data
replicated_data1 = replicate_data(training_data, 50, 0.03)
replicated_data2 = replicate_data(training_data, 50, 0.05)

training_data = training_data.append(replicated_data1, ignore_index=True, sort=False)
training_data = training_data.append(replicated_data2, ignore_index=True, sort=False)

training_data = scaler_train.transform(training_data)
training_data = np.array(training_data)

# Calculate training and testing labels
try:
    a = []
    for index, row in enumerate(training_data):
        dBC = training_data[index + 1][0] - row[0]
        dNC = training_data[index + 1][1] - row[1]
        dLP = training_data[index + 1][2] - row[2]
        dNIC = training_data[index + 1][3] - row[3]

        rates = [dBC, dNC, dLP, dNIC]
        a.append(rates)
except IndexError:
    rates = [0, 0, 0, 0]
    a.append(rates)

a = np.array(a)
training_data = np.append(training_data, a, axis=1)

try:
    a = []
    for index, row in enumerate(testing_data):
        dBC = testing_data[index + 1][0] - row[0]
        dNC = testing_data[index + 1][1] - row[1]
        dLP = testing_data[index + 1][2] - row[2]
        dNIC = testing_data[index + 1][3] - row[3]

        rates = [dBC, dNC, dLP, dNIC]
        a.append(rates)
except IndexError:
    rates = [0, 0, 0, 0]
    a.append(rates)

a = np.array(a)
testing_data = np.append(testing_data, a, axis=1)

# Remove all datapoints corresponding to 144 h from the training and testing sets
count = 0
decrement = 0
for index, row in enumerate(training_data):
    count += 1
    if count == 15:
        delete = index - decrement
        training_data = np.delete(training_data, delete, 0)
        decrement += 1
        count = 0

count = 0
decrement = 0
for index, row in enumerate(testing_data):
    count += 1
    if count == 15:
        delete = index - decrement
        testing_data = np.delete(testing_data, delete, 0)
        decrement += 1
        count = 0

# Shuffle training data
np.random.shuffle(training_data)

# Define structure of optimal network
HL = 2
HN1, HN2 = 14, 10
EPOCHS = 476
BATCH_SIZE = 100
LR = 0.004

xcl_dir = '/content/drive/My Drive/Colab Notebooks/GitHub/FNN/ANN/Run 5/Results/2HL/' #create a new folder for prediction rsults
try:
  os.mkdir(xcl_dir)
except:
  pass


# Instantiate the network and prepare data
for count in range(1):
  avg_mse=10
  min_mse=10

  #while avg_mse > 0.006:
  while count < 50:
    net = Net(HN1, HN2)
    training_inputs = training_data[:, 0:5]
    training_labels = training_data[:, 5:]
    test_inputs = testing_data[:, 0:5]
    test_labels = testing_data[:, 5:]

    # Train and test the network
    train(net, training_inputs, training_labels, EPOCHS, LR, BATCH_SIZE)
    avg_mse, predictions_online, predictions_offline = test(test_inputs, test_labels, net)
    #print(avg_mse)

    count = count + 1
    if min_mse >= avg_mse or count==50 : #count=* is related to while count above
      min_mse = avg_mse
      count_min = count
      # Save file every minimum found

      predictions_online_inverse_transform = scaler_test.inverse_transform(predictions_online)
      predictions_offline_inverse_transform = scaler_test.inverse_transform(predictions_offline)

      online = pd.DataFrame(predictions_online_inverse_transform)
      offline = pd.DataFrame(predictions_offline_inverse_transform)
      avg_mse = pd.DataFrame([avg_mse, 0])
      f= round(min_mse.item(), 5)

      with pd.ExcelWriter('{xcl_dir}Predictions V1.1 run5 {count}_{f}_{x}_{y}-{z}_{a}_{b}_{c}.xlsx'.format(xcl_dir=xcl_dir, x=HL, y=HN1, z=HN2, a=EPOCHS, b=LR, c=BATCH_SIZE, count=count, f=f)) as writer:  
          offline.to_excel(writer, sheet_name='Offline', startrow=1, startcol=1)
          online.to_excel(writer, sheet_name='Online', startrow=1, startcol=1)
          avg_mse.to_excel(writer, sheet_name='Avg_MSE', startrow=1, startcol=1)
      torch.save(net.state_dict(), '{xcl_dir}Model V1.1 run5 {count}_{f}_{x}_{y}-{z}_{a}_{b}_{c}.pt'.format(xcl_dir=xcl_dir, x=HL, y=HN1, z=HN2, a=EPOCHS, b=LR, c=BATCH_SIZE, count=count, f=f))
    print(avg_mse, min_mse, count, count_min)

print(f'\nDuration: {time.time() - start_time:.0f} seconds')


         0
0  0.10844
1  0.00000 0.10844043219045377 1 1
0.3030166205635789 0.10844043219045377 2 1
0.16143285957938586 0.10844043219045377 3 1
0.17669881700112794 0.10844043219045377 4 1
0.16345333985848265 0.10844043219045377 5 1
0.28901129833145256 0.10844043219045377 6 1
0.1714227305881409 0.10844043219045377 7 1
0.13485995216589433 0.10844043219045377 8 1
0.23119452748747157 0.10844043219045377 9 1
0.15284249824916255 0.10844043219045377 10 1
0.15967231996704115 0.10844043219045377 11 1
0.11369334928563041 0.10844043219045377 12 1
0.1861806620191431 0.10844043219045377 13 1
0.14012609846675717 0.10844043219045377 14 1
0.24239881768371008 0.10844043219045377 15 1
0.13265184321452828 0.10844043219045377 16 1
0.14638967787471113 0.10844043219045377 17 1
0.3695253785366585 0.10844043219045377 18 1
0.14336760127758597 0.10844043219045377 19 1
0.21740594721505777 0.10844043219045377 20 1
0.11260607675460937 0.10844043219045377 21 1
0.196631071557913 0.10844043219045377 22 1
0.1579186412