In [None]:
# Installation
!pip install nevergrad
!pip install pyproj
!pip install mixsimulator

Collecting mixsimulator
  Downloading https://files.pythonhosted.org/packages/5e/8c/de0af1b9014bd620b8d49b9eaace7919b03117122dec78f364df068af6dc/mixsimulator-0.2.9.9-py3-none-any.whl
Installing collected packages: mixsimulator
Successfully installed mixsimulator-0.2.9.9


# Examples

## Nevergrad

In [None]:
# Simplest example of minimization
import nevergrad as ng

def square(x):
    return sum((x - 0.5) ** 2)

# optimization on x as an array of shape (2,)
optimizer = ng.optimizers.OnePlusOne(parametrization=2, budget=100)
recommendation = optimizer.minimize(square)  # best value
print(recommendation.value)

[0.50007894 0.49854316]


In [None]:
import nevergrad as ng
print(f'🤖 The list of optimizers in nevergrad library: \n{sorted(ng.optimizers.registry.keys())}')

['ASCMA2PDEthird', 'ASCMADEQRthird', 'ASCMADEthird', 'AdaptiveDiscreteOnePlusOne', 'AlmostRotationInvariantDE', 'AnisotropicAdaptiveDiscreteOnePlusOne', 'AvgMetaRecenteringNoHull', 'BO', 'CM', 'CMA', 'CMandAS', 'CMandAS2', 'CMandAS3', 'CauchyLHSSearch', 'CauchyOnePlusOne', 'CauchyScrHammersleySearch', 'Cobyla', 'DE', 'DiagonalCMA', 'DiscreteBSOOnePlusOne', 'DiscreteDoerrOnePlusOne', 'DiscreteLenglerOnePlusOne', 'DiscreteOnePlusOne', 'DoubleFastGADiscreteOnePlusOne', 'EDA', 'ES', 'FCMA', 'HAvgMetaRecentering', 'HaltonSearch', 'HaltonSearchPlusMiddlePoint', 'HammersleySearch', 'HammersleySearchPlusMiddlePoint', 'HyperOpt', 'LHSSearch', 'LargeHaltonSearch', 'LhsDE', 'MEDA', 'MPCEDA', 'ManyCMA', 'ManySmallCMA', 'MetaModel', 'MetaRecentering', 'MetaTuneRecentering', 'MixES', 'MultiCMA', 'MultiDiscrete', 'MultiScaleCMA', 'MutDE', 'NGO', 'NGOpt', 'NGOpt2', 'NGOpt4', 'NGOpt8', 'NGOptBase', 'NSGAIIES', 'NaiveIsoEMNA', 'NaiveTBPSA', 'NelderMead', 'NoisyBandit', 'NoisyDE', 'NoisyDiscreteOnePlusOn

Let's consider function of several variables of different types

In [None]:
import nevergrad as ng

def square(x, y=12):
    return sum((x - 0.5) ** 2) + abs(y)

# optimization on x as an array of shape (2,)
optimizer = ng.optimizers.OnePlusOne(parametrization=2, budget=100)
recommendation = optimizer.minimize(square)  # best value
print(recommendation.value)

[0.50016658 0.499733  ]


In [None]:
instrum = ng.p.Instrumentation(ng.p.Array(shape=(2,)), y=ng.p.Scalar())
optimizer = ng.optimizers.OnePlusOne(parametrization=instrum, budget=100)
recommendation = optimizer.minimize(square)
print(recommendation.value)

((array([0.55303943, 0.41830154]),), {'y': -0.0004155670028914804})


In [None]:
def square(vars):
    return sum((vars[:-1] - 0.5) ** 2) + abs(vars[-1])

optimizer = ng.optimizers.OnePlusOne(parametrization=3, budget=100)
# optimizer = ng.optimizers.OnePlusOne(parametrization=instrum, budget=100)
recommendation = optimizer.minimize(square)
print(recommendation.value)

[ 4.59289713e-01  5.01061234e-01 -1.88335731e-04]


In [None]:
import nevergrad as ng

def my_function(x):
    return abs(sum(x - 1))

def print_candidate_and_value(optimizer, candidate, value):
    print(f'Candidate: {candidate}, Value: {value}')

optimizer = ng.optimizers.OnePlusOne(parametrization=2, budget=4)
optimizer.register_callback("tell", print_candidate_and_value)
optimizer.minimize(my_function)  # triggers a print at each tell within minimize

Candidate: Array{(2,)}:[0. 0.], Value: 2.0
Candidate: Array{(2,)}:[-0.18247207  2.67349699], Value: 0.4910249240882243
Candidate: Array{(2,)}:[-0.54063735 -0.141925  ], Value: 2.682562350446311
Candidate: Array{(2,)}:[-0.97606428  6.40214891], Value: 3.4260846305552595


Array{(2,)}:[-0.18247207  2.67349699]

In [None]:
# BE CAREFUL, IT TAKES TIME!!!
!python -m nevergrad.benchmark doe --seed=12 --repetitions=1 --num_workers=4 --plot 

## [Optuna](https://colab.research.google.com/github/optuna/optuna/blob/master/examples/quickstart.ipynb)

# A Quick Introduction to Optuna

This Jupyter notebook goes through the basic usage of Optuna.

- Install Optuna
- Write a training algorithm that involves hyperparameters
  - Read train/valid data
  - Define and train model
  - Evaluate model
- Use Optuna to tune the hyperparameters (hyperparameter optimization, HPO)
- Visualize HPO

### Install `optuna`

Optuna can be installed via `pip` or `conda`.

In [None]:
!pip install --quiet optuna

[K     |████████████████████████████████| 286kB 5.3MB/s 
[K     |████████████████████████████████| 163kB 7.0MB/s 
[K     |████████████████████████████████| 81kB 4.8MB/s 
[K     |████████████████████████████████| 481kB 6.8MB/s 
[K     |████████████████████████████████| 133kB 11.8MB/s 
[K     |████████████████████████████████| 51kB 4.3MB/s 
[K     |████████████████████████████████| 112kB 14.0MB/s 
[?25h  Building wheel for Mako (setup.py) ... [?25l[?25hdone
  Building wheel for PrettyTable (setup.py) ... [?25l[?25hdone
  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [None]:
import optuna

optuna.__version__

'2.4.0'

### Optimize Hyperparameters

#### Define a simple scikit-learn model

We start with a simple random forest model to classify flowers in the Iris dataset. We define a function called `objective` that encapsulates the whole training process and outputs the accuracy of the model.

In [None]:
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection

def objective():
    iris = sklearn.datasets.load_iris()  # Prepare the data.
    
    clf = sklearn.ensemble.RandomForestClassifier(    
        n_estimators=5, max_depth=3)  # Define the model.
    
    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()  # Train and evaluate the model.

print('Accuracy: {}'.format(objective()))

Accuracy: 0.96


#### Optimize hyperparameters of the model

The hyperparameters of the above algorithm are `n_estimators` and `max_depth` for which we can try different values to see if the model accuracy can be improved. The `objective` function is modified to accept a trial object. This trial has several methods for sampling hyperparameters. We create a study to run the hyperparameter optimization and finally read the best hyperparameters.

In [None]:
import optuna

def objective(trial):
    iris = sklearn.datasets.load_iris()
    
    n_estimators = trial.suggest_int('n_estimators', 2, 20)
    max_depth = int(trial.suggest_float('max_depth', 1, 32, log=True))
    
    clf = sklearn.ensemble.RandomForestClassifier(
        n_estimators=n_estimators, max_depth=max_depth)
    
    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2021-01-19 20:52:12,402][0m A new study created in memory with name: no-name-9e33d9b3-c470-45dd-9456-34e8b1971c36[0m
[32m[I 2021-01-19 20:52:12,481][0m Trial 0 finished with value: 0.9533333333333333 and parameters: {'n_estimators': 16, 'max_depth': 3.5948720255127977}. Best is trial 0 with value: 0.9533333333333333.[0m
[32m[I 2021-01-19 20:52:12,512][0m Trial 1 finished with value: 0.94 and parameters: {'n_estimators': 4, 'max_depth': 2.6329200110046247}. Best is trial 0 with value: 0.9533333333333333.[0m
[32m[I 2021-01-19 20:52:12,565][0m Trial 2 finished with value: 0.96 and parameters: {'n_estimators': 10, 'max_depth': 23.185136649609785}. Best is trial 2 with value: 0.96.[0m
[32m[I 2021-01-19 20:52:12,623][0m Trial 3 finished with value: 0.96 and parameters: {'n_estimators': 12, 'max_depth': 9.252616393088807}. Best is trial 2 with value: 0.96.[0m
[32m[I 2021-01-19 20:52:12,699][0m Trial 4 finished with value: 0.9533333333333333 and parameters: {'n_estimato

Accuracy: 0.9733333333333333
Best hyperparameters: {'n_estimators': 7, 'max_depth': 4.898670191017612}


It is possible to condition hyperparameters using Python `if` statements. We can for instance include another classifier, a support vector machine, in our HPO and define hyperparameters specific to the random forest model and the support vector machine.

In [None]:
import sklearn.svm

def objective(trial):
    iris = sklearn.datasets.load_iris()

    classifier = trial.suggest_categorical('classifier', ['RandomForest', 'SVC'])
    
    if classifier == 'RandomForest':
        n_estimators = trial.suggest_int('n_estimators', 2, 20)
        max_depth = int(trial.suggest_float('max_depth', 1, 32, log=True))

        clf = sklearn.ensemble.RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth)
    else:
        c = trial.suggest_float('svc_c', 1e-10, 1e10, log=True)
        
        clf = sklearn.svm.SVC(C=c, gamma='auto')

    return sklearn.model_selection.cross_val_score(
        clf, iris.data, iris.target, n_jobs=-1, cv=3).mean()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

trial = study.best_trial

print('Accuracy: {}'.format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[32m[I 2021-01-19 20:52:42,988][0m A new study created in memory with name: no-name-8739ef67-a44e-46aa-bcbc-2c74ea6a4e44[0m
[32m[I 2021-01-19 20:52:43,011][0m Trial 0 finished with value: 0.32 and parameters: {'classifier': 'SVC', 'svc_c': 1.5883665383285734e-10}. Best is trial 0 with value: 0.32.[0m
[32m[I 2021-01-19 20:52:43,063][0m Trial 1 finished with value: 0.9666666666666667 and parameters: {'classifier': 'RandomForest', 'n_estimators': 9, 'max_depth': 4.463018107137593}. Best is trial 1 with value: 0.9666666666666667.[0m
[32m[I 2021-01-19 20:52:43,091][0m Trial 2 finished with value: 0.96 and parameters: {'classifier': 'RandomForest', 'n_estimators': 3, 'max_depth': 9.881065935022777}. Best is trial 1 with value: 0.9666666666666667.[0m
[32m[I 2021-01-19 20:52:43,142][0m Trial 3 finished with value: 0.9333333333333332 and parameters: {'classifier': 'RandomForest', 'n_estimators': 10, 'max_depth': 1.9157848682885572}. Best is trial 1 with value: 0.9666666666666667.

Accuracy: 0.9866666666666667
Best hyperparameters: {'classifier': 'SVC', 'svc_c': 3.9689603403405735}


#### Plotting the study

Plotting the optimization history of the study.

In [None]:
optuna.visualization.plot_optimization_history(study)

Plotting the accuracies for each hyperparameter for each trial.

In [None]:
optuna.visualization.plot_slice(study)

Plotting the accuracy surface for the hyperparameters involved in the random forest model.

In [None]:
optuna.visualization.plot_contour(study, params=['n_estimators', 'max_depth'])

# Exercises

## Task 1
a) Prove that set of positive definite matrices $S^n_{++}$ is convex

b) Prove that $f(x) = |x|$ is convex function

с) Find then $f(x) = x^T A x$ is strongly convex and find strong convexity constant 

d) $l_2$ regularization is a common technique in machine learning. And it has interesting optimization explanation: adding $l_2$ regularization to a convex function $f(x)$ makes $g(x) = f(x) + \mu \|x\|_2$ - strongly convex function. Your task is to prove this statement.

Prove, that adding $\lambda \|x\|_2^2$ to any convex function $f(x)$ ensures strong convexity of a resulting function $g(x) = f(x) + \lambda \|x\|_2^2$. Find the constant of the strong convexity $\mu$.

## Task 2

Implement Rastrigin function $f: \mathbb{R}^d \to \mathbb{R}$ for d = 10. [link](https://www.sfu.ca/~ssurjano/rastr.html)

$$
f(\mathbf{x})=10 d+\sum_{i=1}^{d}\left[x_{i}^{2}-10 \cos \left(2 \pi x_{i}\right)\right]
$$








In [None]:
def rast():
    pass

In [None]:
def optimize():
    pass

* Consider global optimization from [here](https://docs.scipy.org/doc/scipy/reference/optimize.html#global-optimization).
* Plot 4 graphs for different $d$ from {10, 100, 1000, 10000}. On each graph you are to plot $f$ from $N_{fev}$ for 5 methods: `basinhopping`, `brute`, `differential_evolution`, `shgo`, `dual_annealing` from scipy, where $N_{fev}$ - the number of function evaluations. This information is usually avalable from `specific_optimizer.nfev`. If you will need bounds for the optimizer, use $x_i \in [-5, 5]$.

In [None]:
### YOUR CODE


## Task 3

Machine learning models often have hyperparameters. To choose optimal one between them one can use GridSearch or RandomSearch. But these algorithms computationally uneffective and don't use any sort of information about type of optimized function. To overcome this problem one can use
[bayesian optimization](https://distill.pub/2020/bayesian-optimization/). Using this method we optimize our model by sequentially chosing points based on prior information about function. ![Image](https://www.resibots.eu/limbo/_images/bo_concept.png)

In this task you will use [optuna](https://optuna.org/) package for hyperparameter optimization RandomForestClassifier. Your task is to find best Random Forest model varying at least 3 hyperparameters on iris dataset. Examples can be find [here](https://optuna.org/#code_examples) or [here](www.kaggle.com/dixhom/bayesian-optimization-with-optuna-stacking/)

In [None]:
!pip install optuna

In [None]:
import sklearn.datasets
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm

import optuna

In [None]:
iris = sklearn.datasets.load_iris()
x, y = iris.data, iris.target

In [None]:
def objective():
    pass

In [None]:
### Optimization

# Materials

* [ZOOpt](https://github.com/eyounx/ZOOpt) A python package of Zeroth-Order Optimization
* [Nevergrad](https://github.com/facebookresearch/nevergrad) A Python toolbox for performing gradient-free optimization
* [Optuna](https://colab.research.google.com/github/optuna/optuna/blob/master/examples/quickstart.ipynb) tutorial.