In [None]:
from argparse import ArgumentParser
import os
import glob
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from typing import Tuple, List

from src.kernel_ridge_regression.abstract_kernels.kernel_ridge_regression import KernelRidgeRegression
from src.kernel_ridge_regression.kernels.classical_kernel import Gaussian
from src.kernel_ridge_regression.kernels.quantum_kernel import Quantum_Kernel
from src.kernel_ridge_regression.grid import Grid
from src.utils.train_test_split import train_test_split

In [None]:
def load_data(
    image_size: int = 16,
    dataset_size = 5000
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

    images = []
    labels = []

    for file_path in glob.glob('./data/ising/L={}/*'.format(image_size)):
        with open(file_path, 'rb') as f:
            X = np.frombuffer(buffer=f.read(), dtype=np.int8, offset=0).reshape(-1, image_size, image_size)
            temperature = float(file_path.split('=')[-1].split('.bin')[0])
            y = np.full(shape=(X.shape[0],), fill_value=temperature)

            images.append(X)
            labels.append(y)

    X = np.concatenate(images, axis=0)
    y = np.concatenate(labels, axis=0)

    # Permuting the dataset
    idx = np.random.permutation(X.shape[0])
    X = X[idx][:dataset_size]
    y = y[idx][:dataset_size]

    # Splitting train set into train and validation set
    X_train, y_train, X_test, y_test = train_test_split(X, y, validation_fraction=0.2)

    return X_train, y_train, X_test, y_test

def make_grid(
    feature_dim: int,
    variance_data: float,
) -> Tuple[List[float], List[float]]:

    SIGMA = [2.0**k for k in range(5, 7)]
    RIDGE_PARAMETER = map(lambda x: x / (feature_dim * variance_data), [0.5, 1.0, 3.0, 5.0, 20.0]) # careful with normalization, should go to the num in our convention (1/sigma^2 vs. gamma in scikit-learn)
    #RIDGE_PARAMETER = map(lambda x: x / (feature_dim * variance_data), [0.5]) # careful with normalization, should go to the num in our convention (1/sigma^2 vs. gamma in scikit-learn)

    return SIGMA, RIDGE_PARAMETER


def instantiate_regressor() -> KernelRidgeRegression:

    regressor = Gaussian()

    return regressor

In [None]:
# Load the data
X_train, y_train, X_val, y_val = load_data(image_size=4, dataset_size=10)
# Instanciate the regressor and the grid
SIGMA, RIDGE_PARAMETER = make_grid(feature_dim=X_train.shape[1], variance_data=np.var(X_train))
regressor = instantiate_regressor()
grid = Grid(regressor=regressor, ridge_parameter=RIDGE_PARAMETER, sigma=SIGMA, save_directory='./temp')
# Run the grid search
grid.fit(X_train, y_train, X_val, y_val)