In [1]:
# imports
import time
import numpy as np
import pandas as pd
from dask_jobqueue import SLURMCluster
from distributed import Client
from fvgp import GP
import src.kernel as kernels
import src.hyperparameters as hps
import sklearn.metrics as metrics

In [2]:
# Constan 
INPUT_SPACE_DIM = 3
N1 = 1
N2 = 1
DT = np.float64
DATA_DIR = "../data/"
DATA_FILE_NAME = 'data_1960.csv'
MAT_SIZE = 1500
BATCH_SIZE = 100
MAX_ITER_TRAIN = 100
ENV_TO_SOURCE = 'source /u/dssc/ipasia00/test_dask/dask/bin/activate'

In [3]:
# Read the data
data = pd.read_csv(DATA_DIR + DATA_FILE_NAME)
data.dropna(inplace=True)
x_data = data[['Latitude', 'Longitude', 'dt_float']].values
y_data = data['AverageTemperature'].values

idx = np.random.choice(np.arange(x_data.shape[0]), MAT_SIZE, replace=False)
x_train = x_data[idx]
y_train = y_data[idx]


In [4]:
# Create the cluster

## Note!! In the python script this should be replaced with the SLURMCluster
# client = client(SLURMCluster ..... )

client = Client()

In [5]:
# Use the pre-fitted hyperparameters
hyperparameters = np.load("../out/one_one.npy")

In [6]:
gp = GP(INPUT_SPACE_DIM, x_train, y_train, init_hyperparameters=hyperparameters,
        gp_kernel_function=kernels.custom_kernel_one_shot, gp2Scale=True, gp2Scale_dask_client=client, gp2Scale_batch_size=BATCH_SIZE, info=False)

                 hyperparameter_bounds. That means they have to provided to the training.
  gp = GP(INPUT_SPACE_DIM, x_train, y_train, init_hyperparameters=hyperparameters,


In [7]:
# gp.train(max_iter=MAX_ITER_TRAIN, hyperparameter_bounds=hps.build_bounds(1,1))

In [8]:
# extracts some point to test the model

N_TEST = 275

# remove the test points from the training set
x_elegible = np.delete(x_data, idx, axis=0)
y_elegible = np.delete(y_data, idx, axis=0)

# choose N_TEST points to test the model
idx_test = np.random.choice(np.arange(x_elegible.shape[0]), N_TEST, replace=False)
x_test = x_elegible[idx_test]
y_test = y_elegible[idx_test]

In [9]:
#gp.posterior_mean(x_pred=x_train, hyperparameters=hyperparameters, x_out=x_data)
saved = gp.posterior_mean(x_test)

In [10]:
# generate random hps 
random_hps = hps.build_hps(1,1,3)

In [11]:
random_gp = GP(INPUT_SPACE_DIM, x_train, y_train, init_hyperparameters=random_hps,
        gp_kernel_function=kernels.custom_kernel_one_shot, gp2Scale=True, gp2Scale_dask_client=client, gp2Scale_batch_size=BATCH_SIZE, info=False)

In [12]:
random_saved = random_gp.posterior_mean(x_test)

In [13]:
# calculate the mean squared error
mse = metrics.mean_squared_error(y_test, saved["f(x)"])
random_ms = metrics.mean_squared_error(y_test, random_saved["f(x)"])

In [14]:
print(f"mse: {mse}")
print(f"random mse: {random_ms}")

mse: 84.49130500234631
random mse: 3507.063396146172


In [15]:
print(np.sqrt(mse))
print(np.sqrt(random_ms))

9.191915197734708
59.220464335786595
