In [None]:
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from toolkit.dataset import load_spectral_data, dataset_dict_to_dense, normalize_spectra
import torch
def torchify(X, dtype=torch.float):
    return torch.from_numpy(X).type(dtype)
from scipy.signal import savgol_filter
import numpy as np
dataset_dict = load_spectral_data([
    "../raw_data/DataDavid/UVVIS/*.asc",
    "../raw_data/DataHenry23/Perkin/New_*/*.asc",
    "../raw_data/DataJulie/07_09_spectro/**/*.asc",
    "../raw_data/DataJulie/14_09_2023/*.asc",
    "../raw_data/DataJulie/11_09_2023/*.asc",
    "../raw_data/DataJulie/14_09_2023_histo/*.asc",
    "../raw_data/DataJulie/historique/*.asc",
], verbose=True)
wavelength = np.flipud(np.linspace(250, 2500, 226))
wl_mask = wavelength <= 2300

X, Y = dataset_dict_to_dense(dataset_dict)
X = savgol_filter(X, 11, 2, axis=-1)
Xtot = X[:, wl_mask].copy()
Ytot = Y.copy().astype(np.int32)
#X = normalize_spectra(X)
valid_mask = np.logical_or(Y[:, 0] == 13, Y[:, 0] == 12)
dsmean = X[:, wl_mask].mean(axis=0)
dsstd = X[:,wl_mask].std(axis=0) * 2
Xtot = (Xtot-dsmean) / dsstd


Xv = X[valid_mask]
Yv = Y[valid_mask]
X = X[~valid_mask]
Y = Y[~valid_mask]

wavelength = wavelength[wl_mask]

X = X[:, wl_mask]
X = X[Y[:,5]< 5,:]
Y = Y[Y[:,5]< 5,:].astype(np.int32)

print(np.count_nonzero(Y[:, 1] == 0))
print(np.count_nonzero(Y[:, 1] == 1))
print(np.count_nonzero(Y[:, 1] == 2))

Xv = Xv[:, wl_mask]
Xv = Xv[Yv[:,5]< 4,:]
Yv = Yv[Yv[:,5]< 4,:].astype(np.int32)

labels = Y[:, 1]
labels_v = Yv[:, 1]

from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
import torch

Xs = (X - dsmean) / dsstd
Xsv = (Xv-dsmean) / dsstd

In [None]:
accs = list()
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
for i in range(10):
    model = GradientBoostingClassifier()
    model.fit(Xs, Y[:, 1])
    preds = model.predict(Xsv) 
    accs.append(np.count_nonzero(preds == labels_v) / len(preds))
print(np.min(accs), np.max(accs), np.mean(accs))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn_accuracies = list()
for i in range(2, 110):#15
    model = KNeighborsClassifier(i)
    model.fit(Xs, Y[:, 1])
    preds = model.predict(Xsv) 
    acc = np.count_nonzero(preds == labels_v) / len(preds)
    knn_accuracies.append(acc)
print(np.argsort(knn_accuracies)[::-1])
print("KNN", np.min(knn_accuracies), np.max(knn_accuracies), np.mean(knn_accuracies))


In [None]:
import shap
model = KNeighborsClassifier(31)
model.fit(Xs, Y[:, 1])
explainer = shap.KernelExplainer(model.predict_proba, Xs)
shap_values = explainer.shap_values(Xsv[0])
print(shap_values[0].shape)
x = np.arange(len(shap_values[0]))
fig, (ax1, ax2, ax3) = plt.subplots(3)
ax1.scatter(x, shap_values[0], c=Xsv[0], cmap="RdBu")
ax2.scatter(x, shap_values[1], c=Xsv[0], cmap="RdBu")
ax3.scatter(x, shap_values[2], c=Xsv[0], cmap="RdBu")

In [None]:
from hybris.optim import ParticleSwarm
from keever.runners import generate_job
import torch
param_names = ["lr", "width", "offset", "wd", "rho", "complexity", "do"]
bounds = np.asarray([ (1e-5, 1e-4),  (24, 48), (0, 64), (1e-3, 5e-2), (0, 1), (2, 12), (0.0, 0.8)])
typefuns = [ float, int, int, float, float, int, float]
opt = ParticleSwarm(30, [len(bounds), 0], max_fevals=3000)
opt.vmin = bounds[:, 0]
opt.vmax = bounds[:, 1]
opt.reset(42)
seeds = 5
bests = list()
while not opt.stop():
    x = opt.ask()
    y = list()
    for indiv in x:
        fit = []
        for i in range(seeds):
            generate_job(open("../hylaunch.proto.sh").read(), {key: typefun(value) for key, value, typefun in zip(param_names, indiv, typefuns)}, launch =True, shell="bash")
            fit.append(np.max(torch.load("../tmp/res.pkl")["accs"]))

        y.append(-np.median(fit))
    opt.tell(np.asarray(y))
    print("CUR BEST", x[np.argmin(y)], np.min(y))
    bests.append((x[np.argmin(y)].copy(), np.min(y)))

In [None]:
bests

In [None]:
bests

In [None]:
print(len(bests))

In [46]:
for i in range(6):
    print(bests[i][0])

[4.67756256e-05 3.44486545e+01 3.73375898e+01 1.83155826e-02
 1.09753211e-01 1.06053558e+01 1.26643422e-01]
[5.91085456e-05 2.88995865e+01 4.03474348e+01 2.71714218e-02
 3.48593053e-01 1.10406569e+01 4.62530883e-01]
[7.23783389e-05 3.37002482e+01 4.03633932e+01 4.58072304e-02
 0.00000000e+00 6.18058487e+00 7.38579799e-01]
[6.51011572e-05 2.67967569e+01 6.40000000e+01 4.07580434e-03
 1.98625975e-01 1.20000000e+01 8.00000000e-01]
[4.74945910e-05 3.18587437e+01 3.77105239e+01 4.16849855e-02
 6.27147146e-01 9.44214059e+00 4.67817484e-01]
[4.45592263e-05 2.56836894e+01 6.24588013e+01 5.09426220e-03
 4.00000000e-01 1.20000000e+01 8.00000000e-01]
