# Simulate a scan with GPcam on pre-measured data


In [None]:
from pathlib import Path
import numpy as np
import copy as cp
from gpcam.gp_optimizer import fvGPOptimizer
import matplotlib.pyplot as plt
from numpy.random import default_rng
import time
from typing import *
from tqdm.auto import tqdm
from datetime import datetime
import dataloader as dl 


# load dada

In [None]:
source_file =  Path(r"D:\data\SGM4 - 2022 - CrSBr\data\Kiss05_5.h5")
ldr = dl.load(source_file)
xdata = ldr.to_xarray()
xdata
del ldr

In [None]:
xdata

In [None]:
xdata.sum(['Kinetic Energy','OrdinateRange']).plot()

In [None]:
xdata.isel(FSamX=15,FSamY=15).plot()

In [None]:
xdata.isel(FSamX=slice(15,20),FSamY=slice(15,20)).sum(['FSamX','FSamY']).plot()

## data as stack of spectra and point coordinates

In [None]:
from itertools import product

In [None]:
map_shape = xdata.shape[:2]
map_shape

In [None]:
all_positions = np.array(tuple(product(range(xdata.shape[0]),range(xdata.shape[1]))))
all_positions[:5], all_positions.shape

In [None]:
all_spectra = xdata.values.reshape(-1, *xdata.shape[-2:])

## get scan limits

In [None]:
scan_limits = np.array([all_positions.min(axis=0),all_positions.max(axis=0)]).T

# reduce data dimensionality

In [None]:
def reduce(spectrum) -> Tuple[float]:
    """Reduce a spectrum to its mean and std"""
    return spectrum.mean(), spectrum.std()

In [None]:
all_reduced = []
for s in tqdm(all_spectra, total=len(all_spectra), desc="Reducing spectra"):
    all_reduced.append(reduce(s))
all_reduced = np.array(all_reduced)

In [None]:
all_reduced.shape

In [None]:
reduced_maps = all_reduced.reshape(*map_shape,2)

In [None]:
fig,ax = plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(reduced_maps[:,:,0])
ax[1].imshow(reduced_maps[:,:,1])

# GPcam

## define parameters

In [None]:
def ndim_aqfunc(x, gp):
    """Compute the acquisition function for a given GP and a given point"""
    a = 1.5  #####change here, 3.0 for 95 percent confidence interval
    norm = 1.0
    ret = None
    for i in range(gp.input_dim-1):
        x_pred=np.c_[x,np.zeros(x.shape[0])+i].reshape(-1,gp.input_dim)
        cov = gp.posterior_covariance(x_pred)["v(x)"]
        if ret is None:
            ret = cov
        else:
            ret += cov
            
    ret=a * np.sqrt(ret)

    for i in range(gp.input_dim-1):
        x_pred=np.c_[x,np.zeros(x.shape[0])+i].reshape(-1,gp.input_dim)
        mean = gp.posterior_mean(x_pred)["f(x)"]
        ret += norm * mean

    return ret


In [None]:

def manhattan_distance(x,y):
    return np.abs(x[0]-y[0])+np.abs(x[1]-y[1])

def closest_point_on_grid(x):
    """Find the closest point in a grid to a given point"""
    grid = np.array(tuple(product(range(xdata.shape[0]),range(xdata.shape[1]))))
    return grid[np.linalg.norm(grid-x,axis=1).argmin()]


def scan_time(positions, cost_func_params):
    return np.sum([movement_cost(positions[i-1],positions[i],cost_func_params) for i in range(1,len(positions))])


In [None]:

def movement_cost(
        origin: tuple, 
        x: tuple, 
        cost_func_params: dict = None, 
        verbose: bool = False
    ) -> float:
    """Compute the cost of moving from x to y
    
    Args:
        x (tuple): starting position
        y (tuple): ending position
        cost_func_params (dict): dictionary of parameters for the cost function
            - speed (float): speed of the scanner, expressed in mm/s
            - dwell_time (float): dwell time, expressed in s
            - dead_time (float): dead time, expressed in s
            - point_to_um (float): conversion factor from mm to seconds
    
    Returns:
        float: cost of moving from x to y, expressed in seconds

    """
    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(-1,1)
    if cost_func_params is None:
        cost_func_params = {}
    else:
        cost_func_params = cost_func_params.copy()
    speed = cost_func_params.pop('speed',250)
    dwell_time = cost_func_params.pop('dwell_time',0.5)
    dead_time = cost_func_params.pop('dead_time',0.6)
    point_to_um = cost_func_params.pop('point_to_um',1.0)
    weight = cost_func_params.pop('weight',1.0)
    if len(cost_func_params) > 0:
        raise ValueError(f"Unrecognized parameters: {cost_func_params.keys()}")
    distance = manhattan_distance(origin,x) * point_to_um
    time = weight * distance / speed  + dwell_time + dead_time
    if verbose:
        print(f"Distance: {distance:.2f} um, Time: {time:.2f} s"
            f" (dwell: {dwell_time:.2f} s, dead: {dead_time:.2f} s)" 
        )
    return time

def cost_function(origin, x:Sequence[tuple], cost_func_params: dict = None) -> float:
    cost = []
    for xx in x:
        cost.append(movement_cost(origin,xx,cost_func_params))
    return np.array(cost).T


In [None]:

def measurement_loop(
    init_positions, 
    init_values,
    gp,
    n_points=200,
    train_every=20,
    train_at = None,
    verbose=False,
) -> None:
        
    values = init_values.copy()
    positions = init_positions.copy()
    for i in tqdm(range(n_points), desc="Acquisition loop"):
        gp.tell(positions, values)
        next_pt = gp.ask(position=positions[-1],**ask_pars)
        next_on_grid = closest_point_on_grid(next_pt['x'])
        next_pt.update({'pos':next_on_grid})
        next_val = np.array(
            reduce(xdata[next_pt['pos'][0],next_pt['pos'][1]].values)
        )[None,:]
        next_pos = np.array(next_pt['pos'])[None,:]
        # print(next_val,next_pos, sep='\n')
        positions = np.append(positions, next_pos, axis=0)
        values = np.append(values, next_val, axis=0)
        if train_at is None:
            train_at = []
        if i % train_every == 0 or i in train_at:
            t0 = time.time()
            gp.train_gp(**train_pars)
            print(f"Training at i={i} took {time.time()-t0:.2f} s")
    return positions, values
            



In [None]:

def vis_map_with_path(positions,values,reduced_maps=reduced_maps,ax=None):
    if ax is None:
        fig,ax = plt.subplots(1,1,figsize=(6,5))
    ax.imshow(reduced_maps[...,0],alpha=0.5)
    # ax[1].imshow(reduced_maps[...,0],alpha=0.5)
    ax.plot(positions[:,1],positions[:,0],'r-',alpha=0.5,linewidth=.5)
    ax.scatter(positions[:,1],positions[:,0],s=10)#,c=values[:,0],cmap='viridis')
    # ax[1].scatter(positions[:,1],positions[:,0],s=10)#c=values[:,0],cmap='viridis')


In [None]:
optimizer_pars = {
    'input_space_dimension': 2,
    'output_space_dimension': 1,
    'output_number': 2,
    'input_space_bounds': scan_limits,
}
hyperparameter_bounds = np.array(
    [[0.001,1e9],[1,1000],[1,1000],[1,1000],[1,1000]]
)
init_hyperparameters = np.array(
        [4.71907062e+06, 4.07439017e+02, 3.59068120e+02,4e2,4e2]
)
train_pars = {
    'hyperparameter_bounds': hyperparameter_bounds,
    'pop_size': 20,
    'tolerance': 1e-6,
    'max_iter': 2,
}
train_async_pars = {
    'hyperparameter_bounds':hyperparameter_bounds,
    'init_hyperparameters': init_hyperparameters,
    'max_iter': 10000,
    'local_optimizer': "L-BFGS-B",
    'global_optimizer': "genetic",
    'constraints': (),
    'deflation_radius': None,
    'dask_client': None,
}
train_gp_async_pars = {
    'hyperparameter_bounds':hyperparameter_bounds,
    'max_iter': 10000,
    'dask_client': None,
    'deflation_radius': None,
    'constraints': (),
    'local_method': "L-BFGS-B",
    'global_method': "genetic",
}
fvgp_pars = {
    'init_hyperparameters' : init_hyperparameters,
    'compute_device': 'cpu',
    'gp_kernel_function': None,
    'gp_mean_function': None,
    'use_inv':False,
    'ram_economy': True,
}
ask_pars = {
    'n': 1, 
    'acquisition_function': ndim_aqfunc, 
    'bounds': None,
    'method': 'global', 
    'pop_size': 20, 
    'max_iter': 10, 
    'tol': 10e-6, 
    'x0': None, 
    'dask_client': None,
}
cost_func_params = {
    'speed':300,
    'dwell_time':1.0,
    'dead_time':0.6,
    'point_to_um':15,
    'weight':1.0,
}

## Explicit run

### initialize the map with random points

In [None]:
rng = np.random.default_rng()
ind = rng.choice(len(all_positions)-1, size=5, replace=False)
init_positions = all_positions[ind]
init_values = all_reduced[ind]
print(*tuple(zip(init_positions, init_values)),sep='\n')


### initialize the gaussian process object

In [None]:
gp = fvGPOptimizer(**optimizer_pars)
gp.tell(init_positions,init_values)
gp.init_fvgp(**fvgp_pars)

### initial training of gp

In [None]:
%%time
gp.train_gp(**train_pars)

In [None]:
positions = init_positions.copy()
values = init_values.copy()
positions.shape, values.shape

In [None]:
gp.init_cost(cost_function,cost_func_params)

### loop it!

In [None]:
measurement_loop(init_positions, init_values, n_points, train_every)

In [None]:
positions.shape, values.shape

In [None]:
fig,axes = plt.subplots(1,2,figsize=(10,5))
ax = axes.ravel()
ax[0].imshow(reduced_maps[...,0],alpha=0.5)
ax[1].imshow(reduced_maps[...,0],alpha=0.5)
ax[0].plot(positions[:,1],positions[:,0],'r-',alpha=0.5,linewidth=.5)

ax[0].scatter(positions[:,1],positions[:,0],s=10)#,c=values[:,0],cmap='viridis')
ax[1].scatter(positions[:,1],positions[:,0],s=10)#c=values[:,0],cmap='viridis')


In [None]:
fig,axes = plt.subplots(1,2,figsize=(10,5))
ax = axes.ravel()
ax[0].imshow(reduced_maps[...,0],alpha=0.5)
ax[1].imshow(reduced_maps[...,0],alpha=0.5)
ax[0].plot(positions[:,1],positions[:,0],'r-',alpha=0.5,linewidth=.5)

ax[0].scatter(positions[:,1],positions[:,0],s=10)#,c=values[:,0],cmap='viridis')
ax[1].scatter(positions[:,1],positions[:,0],s=10)#c=values[:,0],cmap='viridis')


# Single point initializationm

In [None]:
init_positions = np.array([[map_shape[0]//2,map_shape[1]//2]]*2)
init_values = np.array([
    np.array(reduce(xdata[init_positions[0],init_positions[1]].values)),
    np.array(reduce(xdata[init_positions[0],init_positions[1]].values)),
])
init_positions, init_values


In [None]:
gp = fvGPOptimizer(**optimizer_pars)
gp.tell(init_positions,init_values)
gp.init_fvgp(**fvgp_pars)
gp.train_gp(**train_pars)
positions = init_positions.copy()
values = init_values.copy()
positions.shape, values.shape
# gp.init_cost(cost_function,cost_func_params)

In [None]:
pos_nocost, values_nocost = measurement_loop(
    init_positions, 
    init_values, 
    gp=gp,
    n_points=500, 
    train_every=50,
    train_at = [10, 20],
    verbose=True,
)
counts = list(dict(Counter(tuple(map(tuple,pos_nocost)))).values())
print(f'Number of repeated points: {sum(counts)-len(counts)}')

In [None]:
vis_map_with_path(pos_nocost, values_nocost,reduced_maps=reduced_maps)

In [None]:
print(f'scan time: {scan_time(pos_nocost,cost_func_params):.2f} s')

## with cost function

In [None]:
gp = fvGPOptimizer(**optimizer_pars)
gp.tell(init_positions,init_values)
gp.init_fvgp(**fvgp_pars)
gp.train_gp(**train_pars)
positions = init_positions.copy()
values = init_values.copy()
positions.shape, values.shape
cfp = cost_func_params.copy()
cfp['weight'] = 0.0001
gp.init_cost(cost_function,cost_func_params)

In [None]:
pos_wcost, values_wcost = measurement_loop(
    init_positions, 
    init_values, 
    gp,
    n_points=500, 
    train_every=50,
    train_at = [10, 20],
    verbose=True,
)
counts = list(dict(Counter(tuple(map(tuple,pos_wcost)))).values())
print(f'Number of repeated points: {sum(counts)-len(counts)}')

In [None]:
def how_many_repetitions(positions):
    counts = list(dict(Counter(tuple(map(tuple,positions)))).values())
    print(f'Number of repeated points: {sum(counts)-len(counts)}')
    return counts

In [None]:
n = 500
p,v = pos_wcost[:n], values_wcost[:n]
how_many_repetitions(p)
vis_map_with_path(p,v,reduced_maps=reduced_maps)

In [None]:
print(f'scan time: {scan_time(pos_wcost,cost_func_params):.2f} s')

In [None]:
# count repeated points
from collections import Counter
counts = list(dict(Counter(tuple(map(tuple,pos_wcost)))).values())

print(f'Number of repeated points: {sum(counts)-len(counts)}')


In [None]:
fig,axes = plt.subplots(1,2,figsize=(10,5))

vis_map_with_path(pos_wcost, values_wcost,reduced_maps=reduced_maps,ax=axes[0])
vis_map_with_path(pos_nocost, values_nocost,reduced_maps=reduced_maps,ax=axes[1])

# now for something silly!

In [None]:
gp = fvGPOptimizer(**optimizer_pars)
gp.tell(init_positions,init_values)
gp.init_fvgp(**fvgp_pars)
gp.train_gp(**train_pars)
positions = init_positions.copy()
values = init_values.copy()
positions.shape, values.shape
cfp = cost_func_params.copy()
cfp['weight'] = 0.1
gp.init_cost(cost_function,cost_func_params)

In [None]:

pos_highcost, values_highcost = measurement_loop(
    init_positions, 
    init_values, 
    n_points=500, 
    train_every=50,
    train_at = [10, 20],
    verbose=True,
)
counts = list(dict(Counter(tuple(map(tuple,pos_highcost)))).values())
print(f'Number of repeated points: {sum(counts)-len(counts)}')

In [None]:
vis_map_with_path(pos_highcost, values_highcost,reduced_maps=reduced_maps)

In [None]:
plt.figure()
n = 500
plt.scatter(pos_highcost[:n,1],pos_highcost[:n,0],c=values_highcost[:n,0],cmap='viridis')

In [None]:
print(f'scan time: {scan_time(pos_highcost,cost_func_params):.2f} s')

In [None]:
print(f'no cost scan time:   ' +pretty_print_time(scan_time(pos_nocost,cost_func_params)))
print(f'low cost scan time:  ' +pretty_print_time(scan_time(pos_wcost,cost_func_params)))
print(f'high cost scan time: ' +pretty_print_time(scan_time(pos_highcost,cost_func_params)))
print(f'raster scan time:    ' +pretty_print_time(scan_time(all_positions,cost_func_params)))


In [None]:
def pretty_print_time(t):
    """Print time as hh:mm:ss"""
    h = int(t//3600)
    m = int((t%3600)//60)
    s = int(t%60)
    return f'{h:02d}:{m:02d}:{s:02d}'

In [None]:
all_positions

In [None]:
def get_raw_data(positions,all_positions,xdata):
    raw = []
    for n in range(len(all_positions)):
        if all_positions[n] in positions:
            i,j = all_positions[n]
            raw.append(xdata[i,j,...].values)
    return raw

    

In [None]:
raw_nocost = get_raw_data(pos_nocost,all_positions,xdata)

In [None]:
raw_nocost[0].shape


# kmeans on this thing

In [None]:
import sklearn.cluster as cluster  


In [None]:
res = cluster.kmeans(raw_nocost,n_clusters=3)