In [None]:
import numpy as np
import sklearn as sk
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, BayesianRidge
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import r2_score
import pandas as pd
import csv

In [None]:
'''Data processing and preprocessing functions and routines.'''
data = []
for i in range(1,9,1):
    X = np.load('./function_'+str(i)+'/initial_inputs.npy')
    y = np.load('./function_'+str(i)+'/initial_outputs.npy')
    data.append([X,y])

# Load the query data.
csv_data = []

with open('./039_data.csv', newline='') as csvfile:
    
    reader = csv.reader(csvfile, delimiter=',', quotechar='\"')
    for row in reader:
        csv_data.append(row)
    csvfile.close()

csv_data.remove(csv_data[0])

X,y = [],[]
    
[X.append(row[3:(8+3)]) for row in csv_data]
[y.append(row[(8+3):]) for row in csv_data]

# Due to format issues I could not convert the data into a useable format similar to X,Y above. 
# e.g['0.2 0.2'] does not conver to floats. This means the given data has to be relied upon and 
# is more important than the query data.

'''Prepares the data and takes in the required data object.'''
def prepare_data(d):
    X = d[0]
    #print(X)
    Y = d[1]
    #print(Y)
    
    scaler = StandardScaler()
    scaler.fit(X)
    X = scaler.transform(X)
    
    num_of_points = len(Y)

    idx = list(range(num_of_points))
    np.random.shuffle(idx)
    idx_train = idx[:int(num_of_points * 0.5)]
    idx_train.sort()
    idx_test = idx[int(num_of_points * 0.5):]
    idx_test.sort()

    X_train = X[idx_train, :]
    X_test = X[idx_test, :]

    Y_train = Y[idx_train]
    Y_test = Y[idx_test]
    
    return X_train, X_test, Y_train, Y_test

In [None]:
#Run in loop 10 times and select the model that is the most common. Too little data to use crossvalidation.
for i in range(0,10,1):
    mselect = []
    for d in data:
        X_train, X_test, y_train, y_test = prepare_data(d)
        test_eval = []
    
        model1 = LinearRegression()
        model1.fit(X_train, y_train)
        y_pred = model1.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
    
        model2 = Ridge()
        model2.fit(X_train, y_train)
        y_pred = model2.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
    
        model3 = BayesianRidge()
        model3.fit(X_train, y_train)
        y_pred = model3.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
    
        # Bayesian optimisation
        model4 = GaussianProcessRegressor()
        model4.fit(X_train, y_train)
        y_pred = model4.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
    
        model5 = DecisionTreeRegressor()
        model5.fit(X_train, y_train)
        y_pred = model5.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
    
        model6 = RandomForestRegressor()
        model6.fit(X_train, y_train)
        y_pred = model6.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
    
        model7 = SVR()
        model7.fit(X_train, y_train)
        y_pred = model7.predict(X_test)
        test_eval.append(r2_score(y_test, y_pred))
        
        mselect.append(np.argmin(np.abs(test_eval)))
    
    print(mselect)

In [None]:
# The following code if for generating random inputs to get input output pairs to further train the ML to 
# approximate the blackbox functions.

import random
def generate(n):
    numbers = []
    for i in range(0,n,1):
        numbers.append(random.uniform(0,1))
    
    return numbers

In [None]:
# Model tuning was done by selecting the best parameters for the model setup. A grid search and 
# automated tuning of hyperparameters was done, despite very limited amounts of data, 
# meaning that the statistical value of the selected hyperparameters is low and may change drastically 
# from sample to sample or dataset to dataset. Initial values were deemed suitable for use. Read test_eval 
# from top to bottom to select.

X_train, X_test, y_train, y_test = prepare_data(data[0])
model1 = LinearRegression() #0 lr
#print(X_train)
model1.fit(X_train,y_train)
x = [generate(2)]
y = model1.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[1])
model2 = BayesianRidge() #2 br
model2.fit(X_train,y_train)
x = [generate(2)]
y = model2.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[2])
model3 = GaussianProcessRegressor() #3 gp
model3.fit(X_train,y_train)
x = [generate(3)]
y = model3.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[3])
model4 = SVR() #6 SVR
model4.fit(X_train,y_train)
x = [generate(4)]
y = model4.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[4])
model5 = SVR() #6 SVR
model5.fit(X_train,y_train)
x = [generate(4)]
y = model5.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[5])
model6 = RandomForestRegressor() #5 RF
model6.fit(X_train,y_train)
x = [generate(5)]
y = model6.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[6])
model7 = BayesianRidge() #2 br
model7.fit(X_train,y_train)
x = [generate(6)]
y = model7.predict(x)
#print(x)
#print(y)

X_train, X_test, y_train, y_test = prepare_data(data[7])
model8 = DecisionTreeRegressor() #4 DT
model8.fit(X_train,y_train)
x = [generate(8)]
y = model8.predict(x)
#print(x)
#print(y)

models = []
models.append(model1)
models.append(model2)
models.append(model3)
models.append(model4)
models.append(model5)
models.append(model6)
models.append(model7)
models.append(model8)

In [None]:
# code for testing the ML for outputting the correct values in the correct format.

import random
def rnd_gen(n):
    numbers = []
    for i in range(0,n,1):
        numbers.append(random.uniform(0,1))
    return numbers

l_numbers = []
l_numbers.append(rnd_gen(2))
l_numbers.append(rnd_gen(2))
l_numbers.append(rnd_gen(3))
l_numbers.append(rnd_gen(4))
l_numbers.append(rnd_gen(4))
l_numbers.append(rnd_gen(5))
l_numbers.append(rnd_gen(6))
l_numbers.append(rnd_gen(8))

print(models)
print(l_numbers)

for model, l_num in models, l_numbers:
    inp = model.predict(l_num)
    in_p = ''
    for i, value in ennumerate(inp):
        in_p = str(value)
        if i < len(inp):
            in_p + '-'
    print(in_p)

In [None]:
# The following code if for generating random inputs to get input output pairs to further train the ML to 
# approximate the blackbox functions.

import random
def generate(n):
    numbers = []
    for i in range(0,n,1):
        numbers.append(random.uniform(0,1))
    s = ""
    s += str(numbers[0])[0:8]
    for i in range(1,n,1):
        s += "-"
        s += str(numbers[i])[0:8]
    return s

print(generate(2))
print(generate(2))
print(generate(3))
print(generate(4))
print(generate(4))
print(generate(5))
print(generate(6))
print(generate(8))