# Function 1: Searching for Contamination Sources

This may sound simple because you only have a two-dimensional input, however it is a very difficult problem. It corresponds to trying to find the source of radiation in some square area. However, you can only detect the radiation once you are very close to it, meaning most of the readings will be zero. There are two sources, one is not too dangerous, so make sure you try to find both modes of the function.

#### Student ID: 574

In [3]:
# Import packages
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor

In [4]:
#Set Random seed to the ulitmate answer!
np.random.seed(42)

## Guide and Ideas for Capstone Project

We begin this guide by downloading the data:

In [7]:
X = np.load('initial_inputs.npy')
Y = np.load('initial_outputs.npy')

In [8]:
X

array([[0.31940389, 0.76295937],
       [0.57432921, 0.8798981 ],
       [0.73102363, 0.73299988],
       [0.84035342, 0.26473161],
       [0.65011406, 0.68152635],
       [0.41043714, 0.1475543 ],
       [0.31269116, 0.07872278],
       [0.68341817, 0.86105746],
       [0.08250725, 0.40348751],
       [0.88388983, 0.58225397]])

In [9]:
Y

array([ 1.32267704e-079,  1.03307824e-046,  7.71087511e-016,
        3.34177101e-124, -3.60606264e-003, -2.15924904e-054,
       -2.08909327e-091,  2.53500115e-040,  3.60677119e-081,
        6.22985647e-048])

## Week 1: Random Search

The simplest solution would be a simple random search, that is, we can randomly choose our next query point:

In [12]:
#next_query = np.random.uniform(size = 2)
#print(next_query)
def faraway_point(data, num_guess,new_pts):
    '''Generates new random points based on size of original data set that are far away from the data set
    The function generates num_guess random data points and selects the furthest point from the dataset based on euclidian distance
    If more than one data point is requested then the new data point is appended to the data set.
    :: data : Origional data set
    :: num_guesses : Number of random data points to compare with
    :: new_points : Number of new points to generate
    :: RETURN : Returns data set with new points appended'''
    np.random.seed(42)
    dim = data.shape[1]
    for j in range(new_pts):
        max_sum_dist = 0
        best_point = []
        for i in range(num_guess):
            new_point = np.random.rand(1,dim)
            euclid_dist = np.sqrt(np.sum(np.square(data - new_point), axis=1))
            sum_dist = np.sum(euclid_dist)
            if sum_dist > max_sum_dist:
                max_sum_dist = sum_dist
                best_point = new_point
        print(np.array2string(best_point, precision=6, separator='-', floatmode='fixed',formatter={'float': '{:0.6f}'.format}))
        #data = np.vstack((data,best_point))
    #return data

In [13]:
faraway_point(X, 100000,3)

[[0.001256-0.001021]]
[[0.004560-0.002142]]
[[0.001083-0.000079]]


# Week 2

### Append data

In [16]:
# Add the new data points to the old ones
X = np.load('initial_inputs.npy')
X = np.vstack((X,[0.001256, 0.001021],
[3.84000e-04, 9.99836e-01],
[0.997409, 0.002207]))

X

array([[3.19403889e-01, 7.62959374e-01],
       [5.74329215e-01, 8.79898105e-01],
       [7.31023631e-01, 7.32999876e-01],
       [8.40353417e-01, 2.64731614e-01],
       [6.50114060e-01, 6.81526352e-01],
       [4.10437137e-01, 1.47554299e-01],
       [3.12691157e-01, 7.87227779e-02],
       [6.83418169e-01, 8.61057464e-01],
       [8.25072518e-02, 4.03487506e-01],
       [8.83889829e-01, 5.82253974e-01],
       [1.25600000e-03, 1.02100000e-03],
       [3.84000000e-04, 9.99836000e-01],
       [9.97409000e-01, 2.20700000e-03]])

In [17]:
Y = np.load('initial_outputs.npy').T
Y = np.concatenate((Y,[6.006625024649171e-247],
[0],
[0]
),axis=0)
Y

array([ 1.32267704e-079,  1.03307824e-046,  7.71087511e-016,
        3.34177101e-124, -3.60606264e-003, -2.15924904e-054,
       -2.08909327e-091,  2.53500115e-040,  3.60677119e-081,
        6.22985647e-048,  6.00662502e-247,  0.00000000e+000,
        0.00000000e+000])

### UCB

In [19]:
# create a fitted function for maximisation techniques
gpr_max = GaussianProcessRegressor().fit(X, Y)

In [20]:
# to optimize the acquisition function, we will simply use gridsearch over a space of 10.000 gridpoints
x1 = np.linspace(0, 1, 101)
x2 = x1

X_grid = []
for i in range(len(x1)):
    for j in range(len(x2)):
        X_grid.append([x1[i], x2[j]])

X_grid = np.array(X_grid)
mean, std = gpr_max.predict(X_grid, return_std = True)
ucb = mean + 1.96 * std

idx_max = np.argmax(ucb)
next_query = X_grid[idx_max]
print(next_query)

[0.69 0.  ]


### Probability Improvement

In [22]:
from scipy.stats import norm

max_obs = max(Y)
eta = 0.2

PI_acq = norm.cdf((mean - max_obs - eta) / std)

best_x = X_grid[np.argmax(PI_acq)]
best_x

array([0.67, 0.  ])

### Some more random guesses

In [24]:
faraway_point(X, 100000,3)

[[0.996744-0.999561]]
[[0.999529-0.998264]]
[[0.999951-0.998335]]


# Week 3

### Using skopt

In [27]:
# Import packages for optimisation
from skopt.space import Real
from skopt.utils import use_named_args
from skopt import gp_minimize

# create fitted function for minimization techniques.
# NOTE: Y has been flipped with minus value.
gpr_min = GaussianProcessRegressor().fit(X, -Y)

# Define search space.
dim1 = Real(name='x1', low=0.0, high=1.0)
dim2 = Real(name='x2', low=0.0, high=1.0)

dimensions = [dim1, dim2]

In [28]:
# Create black box function
@use_named_args(dimensions=dimensions)
def black_box_function_min(x1,x2):
    print('x1',x1,'x2',x2,'y',gpr_min.predict(np.column_stack((x1,x2))))
    return np.ndarray.item(gpr_min.predict(np.column_stack((x1,x2))))

In [29]:
# Use GP-minimise to search for best solution
result_min = gp_minimize(func=black_box_function_min,
                         dimensions=dimensions,
                         random_state=42)

x1 0.7965429868602331 x2 0.18343478986616382 y [-0.0075162]
x1 0.7796910002727695 x2 0.5968501579464871 y [0.00211299]
x1 0.44583275285359125 x2 0.09997491581800291 y [-0.0061473]
x1 0.45924889196586727 x2 0.3337086111390219 y [0.01008613]
x1 0.1428668179219408 x2 0.650888472948853 y [-0.00100332]
x1 0.05641157902710027 x2 0.7219987722668249 y [-0.00667437]
x1 0.9385527090157504 x2 0.0007787658410143285 y [-0.01225605]
x1 0.9922115592912177 x2 0.6174815096277166 y [-0.0025943]
x1 0.611653160488281 x2 0.007066305219717408 y [-0.02751223]
x1 0.02306242504141576 x2 0.5247746602583893 y [-0.00652665]
x1 0.4700959584515162 x2 0.9282310681057081 y [-0.00192741]
x1 0.6340467674692489 x2 0.818322143609067 y [0.000445]
x1 0.2599718641195569 x2 0.9998615362082383 y [-0.00686621]
x1 0.14133048287880892 x2 0.04501528534787325 y [0.00332963]
x1 0.7038268062662599 x2 0.0 y [-0.03123825]
x1 0.9786015013478431 x2 0.992936005659333 y [0.00652019]
x1 0.0 x2 1.0 y [4.06319775e-05]
x1 0.000234097158738333

In [30]:
# Results of minimisation
result_min.func_vals

array([-7.51620224e-03,  2.11298834e-03, -6.14730467e-03,  1.00861294e-02,
       -1.00331636e-03, -6.67436799e-03, -1.22560535e-02, -2.59429525e-03,
       -2.75122337e-02, -6.52665381e-03, -1.92740589e-03,  4.44999497e-04,
       -6.86620709e-03,  3.32962698e-03, -3.12382532e-02,  6.52019098e-03,
        4.06319775e-05, -7.91917948e-03,  1.16741625e-02, -1.85312826e-02,
       -3.09841041e-02, -3.12517153e-02, -3.12597891e-02,  6.76811495e-03,
       -2.07363593e-04, -3.12611491e-02,  7.85328223e-03, -3.12620221e-02,
       -8.42094846e-03, -3.12621747e-02,  8.24632680e-03, -3.12623585e-02,
       -8.41656847e-03, -3.12624306e-02, -3.12623057e-02, -5.19055317e-03,
       -3.12624309e-02, -3.12623214e-02, -4.01716062e-03,  4.29436165e-03,
       -3.12624073e-02, -3.12624272e-02, -3.12623651e-02,  8.86614890e-03,
       -3.12611405e-02, -6.49919138e-03, -7.46207527e-03, -5.87458793e-03,
        4.00876291e-03, -3.12624169e-02, -3.12621907e-02,  5.41952072e-03,
       -3.12624277e-02, -

In [31]:
# Corresponsing X guesses
result_min.x_iters

[[0.7965429868602331, 0.18343478986616382],
 [0.7796910002727695, 0.5968501579464871],
 [0.44583275285359125, 0.09997491581800291],
 [0.45924889196586727, 0.3337086111390219],
 [0.1428668179219408, 0.650888472948853],
 [0.05641157902710027, 0.7219987722668249],
 [0.9385527090157504, 0.0007787658410143285],
 [0.9922115592912177, 0.6174815096277166],
 [0.611653160488281, 0.007066305219717408],
 [0.02306242504141576, 0.5247746602583893],
 [0.4700959584515162, 0.9282310681057081],
 [0.6340467674692489, 0.818322143609067],
 [0.2599718641195569, 0.9998615362082383],
 [0.14133048287880892, 0.04501528534787325],
 [0.7038268062662599, 0.0],
 [0.9786015013478431, 0.992936005659333],
 [0.0, 1.0],
 [0.00023409715873833303, 0.27614937717485943],
 [0.9975269325337545, 0.34640713000785944],
 [0.6621814445074825, 0.09032017478956224],
 [0.7422693674125657, 0.0],
 [0.7185770612844319, 0.0],
 [0.7156268383924105, 0.0],
 [0.693065092233857, 0.998717114671638],
 [0.0005861975467132964, 0.00368494441138977

In [32]:
# Print next guess in format for Google form
print(np.array2string(np.array(result_min.x), precision=6, separator='-', floatmode='fixed',formatter={'float': '{:0.6f}'.format}))

[0.712711-0.000000]


In [33]:
def black_box_function_max(x1,x2):
    #print('x1',x1,'x2',x2,'y',gpr_max.predict(np.column_stack((x1,x2))))
    return np.ndarray.item(gpr_max.predict(np.column_stack((x1,x2))))

# Week 4

In [35]:
# Update X values
X = np.load('initial_inputs.npy')
X = np.vstack((X,[0.001256, 0.001021],
[3.84000e-04, 9.99836e-01],
[0.997409, 0.002207],
[7.09823e-01, 1.00000e-06],
[0.997527, 0.346407],
[0.459249, 0.333709],
[0.9975,   0.124873],
[0.19632,  0.349227]              
              ))

X

array([[3.19403889e-01, 7.62959374e-01],
       [5.74329215e-01, 8.79898105e-01],
       [7.31023631e-01, 7.32999876e-01],
       [8.40353417e-01, 2.64731614e-01],
       [6.50114060e-01, 6.81526352e-01],
       [4.10437137e-01, 1.47554299e-01],
       [3.12691157e-01, 7.87227779e-02],
       [6.83418169e-01, 8.61057464e-01],
       [8.25072518e-02, 4.03487506e-01],
       [8.83889829e-01, 5.82253974e-01],
       [1.25600000e-03, 1.02100000e-03],
       [3.84000000e-04, 9.99836000e-01],
       [9.97409000e-01, 2.20700000e-03],
       [7.09823000e-01, 1.00000000e-06],
       [9.97527000e-01, 3.46407000e-01],
       [4.59249000e-01, 3.33709000e-01],
       [9.97500000e-01, 1.24873000e-01],
       [1.96320000e-01, 3.49227000e-01]])

In [36]:
# Update Y values
Y = np.load('initial_outputs.npy').T
Y = np.concatenate((Y,[6.006625024649171e-247],
[0],
[0],
[-1.29E-182],
[-4.02E-151],
[-2.51E-08],
[-4.25E-272],
[1.2572517673559457e-39]
),axis=0)
Y

array([ 1.32267704e-079,  1.03307824e-046,  7.71087511e-016,
        3.34177101e-124, -3.60606264e-003, -2.15924904e-054,
       -2.08909327e-091,  2.53500115e-040,  3.60677119e-081,
        6.22985647e-048,  6.00662502e-247,  0.00000000e+000,
        0.00000000e+000, -1.29000000e-182, -4.02000000e-151,
       -2.51000000e-008, -4.25000000e-272,  1.25725177e-039])

In [37]:
# create fitted function for minimization techniques.
# NOTE: Y has been flipped with minus value.
gpr_min = GaussianProcessRegressor().fit(X, -Y)


In [38]:
from skopt.space import Real
from skopt.utils import use_named_args
from skopt import gp_minimize
dim1 = Real(name='x1', low=0.0, high=1.0)
dim2 = Real(name='x2', low=0.0, high=1.0)

dimensions = [dim1, dim2]

@use_named_args(dimensions=dimensions)
def black_box_function_min(x1,x2):
    print('x1',x1,'x2',x2,'y',gpr_min.predict(np.column_stack((x1,x2))))
    return np.ndarray.item(gpr_min.predict(np.column_stack((x1,x2))))

result_min = gp_minimize(func=black_box_function_min,
                         dimensions=dimensions,
                         random_state=42)



x1 0.7965429868602331 x2 0.18343478986616382 y [0.00032307]
x1 0.7796910002727695 x2 0.5968501579464871 y [0.00179388]
x1 0.44583275285359125 x2 0.09997491581800291 y [-0.00045142]
x1 0.45924889196586727 x2 0.3337086111390219 y [3.14314093e-06]
x1 0.1428668179219408 x2 0.650888472948853 y [-0.00269701]
x1 0.05641157902710027 x2 0.7219987722668249 y [-0.00412145]
x1 0.9385527090157504 x2 0.0007787658410143285 y [0.00131349]
x1 0.9922115592912177 x2 0.6174815096277166 y [-0.00079294]
x1 0.611653160488281 x2 0.007066305219717408 y [-0.00161726]
x1 0.02306242504141576 x2 0.5247746602583893 y [-0.00226159]
x1 0.3360070589901088 x2 0.9978363962915617 y [-0.00837407]
x1 0.1613186514443243 x2 1.0 y [-0.01339844]
x1 0.9983329122213579 x2 0.9971292460562787 y [0.05998046]
x1 0.0 x2 0.0 y [-0.00018718]
x1 1.0 x2 0.40462638683259156 y [0.00037847]
x1 0.0 x2 0.25420321058258377 y [0.01203255]
x1 0.0 x2 1.0 y [6.9490794e-05]
x1 0.23500064168575766 x2 0.8756460348084286 y [-0.0082386]
x1 0.2564646672

In [39]:
# Print next guess in format for Google form
print(np.array2string(np.array(result_min.x), precision=6, separator='-', floatmode='fixed',formatter={'float': '{:0.6f}'.format}))

[0.184522-1.000000]


## Week 5


In [41]:
X = np.load('initial_inputs.npy')
X = np.vstack((X,[0.001256, 0.001021],
[3.84000e-04, 9.99836e-01],
[0.997409, 0.002207],
[7.09823e-01, 1.00000e-06],
[0.997527, 0.346407],
[0.459249, 0.333709],
[0.9975,   0.124873],
[0.19632,  0.349227],
[0.184525, 0.999999],
[0.998333, 0.997129]

              ))

print('X data:',X)

Y = np.load('initial_outputs.npy').T
Y = np.concatenate((Y,[6.006625024649171e-247],
[0],
[0],
[-1.29E-182],
[-4.02E-151],
[-2.51E-08],
[-4.25E-272],
[1.2572517673559457e-39],
[-8.20E-234],
[2.606008660915559e-190]
                    
),axis=0)
print('Y data:',Y)

# create fitted function for -y minimization techniques
gpr_min = GaussianProcessRegressor().fit(X, -Y)


from skopt.space import Real
from skopt.utils import use_named_args
from skopt import gp_minimize
dim1 = Real(name='x1', low=0.0, high=1.0)
dim2 = Real(name='x2', low=0.0, high=1.0)

dimensions = [dim1, dim2]

@use_named_args(dimensions=dimensions)
def black_box_function_min(x1,x2):
    print('x1',x1,'x2',x2,'y',gpr_min.predict(np.column_stack((x1,x2))))
    return np.ndarray.item(gpr_min.predict(np.column_stack((x1,x2))))

print('STARTING OPTIMISATION:')

result_min = gp_minimize(func=black_box_function_min,
                         dimensions=dimensions,
                         random_state=42)



# Print next guess in format for Google form
print('NEXT GUESS:',np.array2string(np.array(result_min.x), precision=6, separator='-', floatmode='fixed',formatter={'float': '{:0.6f}'.format}))

X data: [[3.19403889e-01 7.62959374e-01]
 [5.74329215e-01 8.79898105e-01]
 [7.31023631e-01 7.32999876e-01]
 [8.40353417e-01 2.64731614e-01]
 [6.50114060e-01 6.81526352e-01]
 [4.10437137e-01 1.47554299e-01]
 [3.12691157e-01 7.87227779e-02]
 [6.83418169e-01 8.61057464e-01]
 [8.25072518e-02 4.03487506e-01]
 [8.83889829e-01 5.82253974e-01]
 [1.25600000e-03 1.02100000e-03]
 [3.84000000e-04 9.99836000e-01]
 [9.97409000e-01 2.20700000e-03]
 [7.09823000e-01 1.00000000e-06]
 [9.97527000e-01 3.46407000e-01]
 [4.59249000e-01 3.33709000e-01]
 [9.97500000e-01 1.24873000e-01]
 [1.96320000e-01 3.49227000e-01]
 [1.84525000e-01 9.99999000e-01]
 [9.98333000e-01 9.97129000e-01]]
Y data: [ 1.32267704e-079  1.03307824e-046  7.71087511e-016  3.34177101e-124
 -3.60606264e-003 -2.15924904e-054 -2.08909327e-091  2.53500115e-040
  3.60677119e-081  6.22985647e-048  6.00662502e-247  0.00000000e+000
  0.00000000e+000 -1.29000000e-182 -4.02000000e-151 -2.51000000e-008
 -4.25000000e-272  1.25725177e-039 -8.20000000e