In [6]:
notes = ''' 

Sci kit learn classes to pick a model, generate a dummy set etc

Oliver: 
The external optimization loop should be optimizing over the 'model parameters'. So in that example they initialize the model with SVC() using 
all default parameters. But you should also be able to find better model parameters for the task at hand - that's what optuna, etc. are doing 
for you. So notice how the model itself is well packaged in that loop, with just model.init(), model.fit(), model.score() methods exposed. 
The trick for us will be to decide how to package this whole training loop in a similarly compact fashion - it's just one more meta layer of 
abstraction - so that the optuna or ray optimization framework can do their work in a similarly small number of lines of code.


Then the trick will be - it won't actually always be a neural network in the inner loop. We can sub in a physics based model, or a full multiphysics
simulation, or even a live real-world process. If the packaging is the same, then the same code and tools can be re-used.
Looking ahead, we can make some assumptions. You can assume we'll always have a .score() objective function, and that the goal will always be to minimize 
that number. You can also assume that the parameter input to the loop will be a dictionary, and the method should be flexible to a variable number of
parameters in the dictionary.

'''

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

import optuna

# Hope this isnt a terrible thing to do, create a virtual environment
# next time to add packages
# import sys
# !{sys.executable} -m pip install packaging==21.0

# !{sys.executable} -m pip install 'ray[tune]'

######### Quick start optuna example ####################################

def objective(trial):
    x = trial.suggest_uniform('x', -10, 10)
    return (x-2)**2

study = optuna.create_study()
study.optimize(objective, n_trials=100)

study.best_params

############################### Ray quick start example ############################################

from ray import tune


def objective(step, alpha, beta):
    return (0.1 + alpha * step / 100)**(-1) + beta * 0.1


def training_function(config):
    # Hyperparameters
    alpha, beta = config["alpha"], config["beta"]
    for step in range(10):
        # Iterative training function - can be any arbitrary training procedure.
        intermediate_score = objective(step, alpha, beta)
        # Feed the score back back to Tune.
        tune.report(mean_loss=intermediate_score)


analysis = tune.run(
    training_function,
    config={
        "alpha": tune.grid_search([0.001, 0.01, 0.1]),
        "beta": tune.choice([1, 2, 3])
    })

print("Best config: ", analysis.get_best_config(
    metric="mean_loss", mode="min"))

# Get a dataframe for analyzing trial results.
df = analysis.results_df

################## Pipeline tutorial ##########################
x, y =  make_classification(random_state=0)
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

# SVC is default right here but we want to find better parameters with optuna right
# here instead
pipe = Pipeline([('scalar', StandardScaler()), ('svc', SVC())])

pipe.fit(x_train, y_train)
pipe.score(x_test, y_test)

#################################################################
    
    
print("All GOOD")

[32m[I 2021-10-11 00:04:03,635][0m A new study created in memory with name: no-name-c48959eb-50cb-4522-9e8a-9e1b7692de20[0m
[32m[I 2021-10-11 00:04:03,638][0m Trial 0 finished with value: 8.707704130823227 and parameters: {'x': -0.9508819242428572}. Best is trial 0 with value: 8.707704130823227.[0m
[32m[I 2021-10-11 00:04:03,639][0m Trial 1 finished with value: 19.141254978241786 and parameters: {'x': 6.375071996920941}. Best is trial 0 with value: 8.707704130823227.[0m
[32m[I 2021-10-11 00:04:03,641][0m Trial 2 finished with value: 47.14418702095881 and parameters: {'x': 8.866162466833917}. Best is trial 0 with value: 8.707704130823227.[0m
[32m[I 2021-10-11 00:04:03,642][0m Trial 3 finished with value: 1.134422058959872 and parameters: {'x': 3.065092511925547}. Best is trial 3 with value: 1.134422058959872.[0m
[32m[I 2021-10-11 00:04:03,643][0m Trial 4 finished with value: 1.5641129736034314 and parameters: {'x': 3.2506450230194943}. Best is trial 3 with value: 1.1344

Trial name,status,loc,alpha,beta
training_function_6e905_00000,PENDING,,0.001,1
training_function_6e905_00001,PENDING,,0.01,2
training_function_6e905_00002,PENDING,,0.1,2


Result for training_function_6e905_00000:
  date: 2021-10-11_00-04-15
  done: false
  experiment_id: 0e77cd13f0c6460aa5c52d124a9cad28
  hostname: Chandlers-MacBook-Pro.local
  iterations_since_restore: 1
  mean_loss: 10.1
  neg_mean_loss: -10.1
  node_ip: 10.0.0.180
  pid: 30718
  time_since_restore: 8.177757263183594e-05
  time_this_iter_s: 8.177757263183594e-05
  time_total_s: 8.177757263183594e-05
  timestamp: 1633935855
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 6e905_00000
  
Result for training_function_6e905_00001:
  date: 2021-10-11_00-04-15
  done: false
  experiment_id: a20980cd3d4a491980fca4f82fbe1f37
  hostname: Chandlers-MacBook-Pro.local
  iterations_since_restore: 1
  mean_loss: 10.2
  neg_mean_loss: -10.2
  node_ip: 10.0.0.180
  pid: 30725
  time_since_restore: 8.702278137207031e-05
  time_this_iter_s: 8.702278137207031e-05
  time_total_s: 8.702278137207031e-05
  timestamp: 1633935855
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id:

Trial name,status,loc,alpha,beta,loss,iter,total time (s),neg_mean_loss
training_function_6e905_00000,TERMINATED,,0.001,1,10.091,10,0.0615439,-10.091
training_function_6e905_00001,TERMINATED,,0.01,2,10.1108,10,0.0412638,-10.1108
training_function_6e905_00002,TERMINATED,,0.1,2,9.37431,10,0.0492768,-9.37431


2021-10-11 00:04:15,329	INFO tune.py:617 -- Total run time: 5.66 seconds (2.21 seconds for the tuning loop).


Best config:  {'alpha': 0.1, 'beta': 2}
All GOOD


In [17]:
import sklearn
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline

import optuna

from ray import tune

def objective(trial):
    # questions I have going into this
    # where exactly does the ray optimization fall????
    # Should I be using optima separaretly or ask about the tuning
    # from ray with bayesiain optimization
    
    # do I like edit the hyperparameters with optuna and then immdediately run
    # configurations from ray??
    
    # 
    svc_c = trial.suggest_float('svc_c',1e-10, 1e10, log=True)
    
    x, y =  make_classification(random_state=0)
    # x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)
    
    # does this update the SVC like I am attempting or did that not accomplish anything
    obj = sklearn.svm.SVC(C=svc_c, gamma='auto')
    # pipe = Pipeline([('scalar', StandardScaler()), ('svc', SVC())])
    score = sklearn.model_selection.cross_val_score(obj, x, y, n_jobs=-1, cv=3)
    accuracy = score.mean()
    
    
    # pipe.fit(x_train, y_train)
    # ans = pipe.score(x_test, y_test)
    
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
print(study.best_trial)

[32m[I 2021-10-11 12:10:17,527][0m A new study created in memory with name: no-name-5bff78ee-8f20-47ce-8efa-8a2eae8fa66c[0m
[32m[I 2021-10-11 12:10:19,448][0m Trial 0 finished with value: 0.8406862745098039 and parameters: {'svc_c': 1.3915284479755351e-05}. Best is trial 0 with value: 0.8406862745098039.[0m
[32m[I 2021-10-11 12:10:19,974][0m Trial 1 finished with value: 0.8406862745098039 and parameters: {'svc_c': 0.0003787161276670669}. Best is trial 0 with value: 0.8406862745098039.[0m
[32m[I 2021-10-11 12:10:20,420][0m Trial 2 finished with value: 0.8094362745098039 and parameters: {'svc_c': 296.96463475911264}. Best is trial 0 with value: 0.8406862745098039.[0m
[32m[I 2021-10-11 12:10:20,858][0m Trial 3 finished with value: 0.8094362745098039 and parameters: {'svc_c': 64.86388396956013}. Best is trial 0 with value: 0.8406862745098039.[0m
[32m[I 2021-10-11 12:10:20,867][0m Trial 4 finished with value: 0.8094362745098039 and parameters: {'svc_c': 3.56172040135159}. B

FrozenTrial(number=65, values=[0.8504901960784313], datetime_start=datetime.datetime(2021, 10, 11, 12, 10, 23, 427324), datetime_complete=datetime.datetime(2021, 10, 11, 12, 10, 23, 436787), params={'svc_c': 0.7923180575562476}, distributions={'svc_c': LogUniformDistribution(high=10000000000.0, low=1e-10)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=65, state=TrialState.COMPLETE, value=None)
