### Import libraries

In [1]:
import matplotlib.pyplot as plt
from pathlib import Path
import json

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_validate

from sklearn.metrics import r2_score
                                                                                                                                                                                          
from bayes_opt.event import Events
from Utils import newBayesianOptimization, newJSONLogger

In [2]:
# modelos
from sklearn.svm import SVR

### Import data

In [4]:
path_to_file = Path().resolve().parent / "Data" / "datos edgar.xlsx"

data = (
    pd.read_excel(path_to_file, header=1)
    .dropna(how="all")
    [['espAl2O3','espSiO2','R (1)','Lambda']]
    .iloc[4:]
    .query("Lambda >= 580 and Lambda <= 620")
)

In [34]:
X = data[['espAl2O3', 'espSiO2', 'Lambda']]

y = data[['R (1)']]

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=0
)

Y_train_ravel = Y_train.values.ravel()
Y_test_ravel = Y_test.values.ravel()

In [35]:
# scaler
scaler = StandardScaler()
scaler.set_output(transform="pandas")

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [36]:
def opt_bas(C, epsilon, gamma):
    
    # parameters
    hyperparameters = {
        'kernel' : "rbf",
        'C' : C,
        'epsilon': epsilon,
        'gamma' : gamma,
    }
    
    # fit cross validation
    model = SVR(**hyperparameters)
    cv = cross_validate(model, X_train, Y_train_ravel, cv=5, scoring="r2")
    cv_score = cv['test_score'].mean()

    # fit and predict test
    model = SVR(**hyperparameters).fit(X_train, Y_train_ravel)
    Y_pred = model.predict(X_test)
    r2 = r2_score(Y_test_ravel, Y_pred)

    return 2*r2 + cv_score

### Run optimization

In [43]:
# Bounded region of parameter space
pbounds = {'C': (30, 500), 'epsilon': (0.001, 0.07),  'gamma': (0.2, 0.7)}

# Bayes optimizer instantiation
optimizer = newBayesianOptimization(
    f=opt_bas, 
    pbounds=pbounds, 
    random_state=1, 
    verbose=2,
)

# keep data
log_path = Path().resolve() / "Logs" / "svr.jsonl"
logger = newJSONLogger(path = str(log_path))
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [38]:
# optimizer.res

In [None]:
# get all the current parameters of the model
optimizer.load_previous(log_path)

In [None]:
optimizer.maximize(init_points=6, n_iter=500)