In [29]:
# imports
import time
import numpy as np
import pandas as pd
from dask_jobqueue import SLURMCluster
from distributed import Client
from fvgp import GP
import src.kernel as kernels
import src.hyperparameters as hps


In [30]:
# Constan 
INPUT_SPACE_DIM = 3
N1 = 1
N2 = 1
DT = np.float64
DATA_DIR = "../data/"
DATA_FILE_NAME = 'data_1960.csv'
MAT_SIZE = 100
BATCH_SIZE = 10
MAX_ITER_TRAIN = 100
ENV_TO_SOURCE = 'source /u/dssc/ipasia00/test_dask/dask/bin/activate'

In [31]:
# Read the data
data = pd.read_csv(DATA_DIR + DATA_FILE_NAME)
data.dropna(inplace=True)
x_data = data[['Latitude', 'Longitude', 'dt_float']].values
y_data = data['AverageTemperature'].values

idx = np.random.choice(np.arange(x_data.shape[0]), MAT_SIZE, replace=False)
x_train = x_data[idx]
y_train = y_data[idx]


In [32]:
# Create the cluster

## Note!! In the python script this should be replaced with the SLURMCluster
# client = client(SLURMCluster ..... )

client = Client()

In [33]:
# Use the pre-fitted hyperparameters
hyperparameters = np.array([-6.47627580e+01,
                            -8.33833357e+01,
                            1.98049380e+03,
                            5.72131884e-01,
                            4.54867958e-01,
                            3.77962227e+00,
                            2.85106104e+00,
                            8.61843803e+01,
                            5.85992597e+01])

In [34]:
gp = GP(INPUT_SPACE_DIM, x_train, y_train, init_hyperparameters=hyperparameters,
        gp_kernel_function=kernels.custom_kernel_one_shot, gp2Scale=True, gp2Scale_dask_client=client, gp2Scale_batch_size=BATCH_SIZE, info=False)

In [35]:
# extracts some point to test the model

N_TEST = 100

# remove the test points from the training set
x_elegible = np.delete(x_data, idx, axis=0)
y_elegible = np.delete(y_data, idx, axis=0)

# choose N_TEST points to test the model
idx_test = np.random.choice(np.arange(x_elegible.shape[0]), N_TEST, replace=False)
x_test = x_elegible[idx_test]
y_test = y_elegible[idx_test]

In [36]:
#gp.posterior_mean(x_pred=x_train, hyperparameters=hyperparameters, x_out=x_data)
saved = gp.posterior_mean(x_test)

In [37]:
# generate random hps 
random_hps = hps.build_hps(1,1,3)

In [38]:
random_gp = GP(INPUT_SPACE_DIM, x_train, y_train, init_hyperparameters=random_hps,
        gp_kernel_function=kernels.custom_kernel_one_shot, gp2Scale=True, gp2Scale_dask_client=client, gp2Scale_batch_size=BATCH_SIZE, info=False)

In [39]:
random_saved = random_gp.posterior_mean(x_test)

In [40]:
# calculate the mean squared error
import sklearn.metrics as metrics
mse = metrics.mean_squared_error(y_train, saved["f(x)"])
random_ms = metrics.mean_squared_error(y_train, random_saved["f(x)"])

In [41]:
print(f"mse: {mse}")
print(f"random mse: {random_ms}")

mse: 84.84031149965531
random mse: 2070.8883780913734


In [42]:
# print(np.sqrt(mse))
# print(np.sqrt(random_ms))

9.210880061082943
45.50701460315072
