<a href="https://colab.research.google.com/github/Jitpanu-Chai/Optuna/blob/main/Optuna_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qq optuna

[K     |████████████████████████████████| 308 kB 9.6 MB/s 
[K     |████████████████████████████████| 81 kB 12.6 MB/s 
[K     |████████████████████████████████| 210 kB 66.6 MB/s 
[K     |████████████████████████████████| 78 kB 7.6 MB/s 
[K     |████████████████████████████████| 113 kB 100.0 MB/s 
[K     |████████████████████████████████| 49 kB 6.9 MB/s 
[K     |████████████████████████████████| 146 kB 91.4 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [None]:
import os
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms
import optuna

# **PyTorch**

In [None]:
DEVICE = torch.device("cuda") #torch.device("cpu")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

# **Optuna+Model construct**

In [None]:
def define_model(trial):
    #optmize number of layer ,hidden unit drop out
    n_layers = trial.suggest_int("n_layers",1,3)
    layers=[]

    in_features = 28*28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i),4,128)
        layers.append(nn.Linear(in_features,out_features))
        layers.append(nn.ReLU())
        p= trial.suggest_float("dropout_l{}".format(i),0.2,0.5)
        layers.append(nn.Dropout(p))

        in_features=out_features
    layers.append(nn.Linear(in_features,CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [None]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

# **Optuna Area**

In [None]:
def objective(trial):
    #code here
    #create model
    model = define_model(trial).to(DEVICE)

    #create optimize
    optimizer_name=trial.suggest_categorical("optimizers",["Adam","RMSprop","SGD"])
    lr= trial.suggest_float("lr",1e-5,1e-1,log=True)
    optimizer= getattr(optim, optimizer_name)(model.parameters(),lr=lr)

    #get MNIST dataset
    train_loader,valid_loader = get_mnist()

    #train model
    for eporch in range(EPOCHS):
        #train torch code model
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy,eporch) #report to optuna to check status its worth or not
        #handing prun algo
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()    
    return accuracy

In [None]:
# if __name__ == 'main':

study = optuna.create_study(direction='maximize') #select own direction or default
study.optimize(objective,n_trials=100,timeout=600) #number of trials

pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print('study statistic')
print('number of finished trials: ',len(study.trials))
print('number of proned trials: ',len(pruned_trials))
print('number of completed trials: ',len(complete_trials))

print("Best trials:")
trial =study.best_trial
print('Value', trial.value)

print('Params')
for key,value in trial.params.items():
    print('{}:{}'.format(key,value))


[32m[I 2022-04-17 13:12:05,027][0m A new study created in memory with name: no-name-cd19d78a-4d57-4cee-abc9-c1917b05d294[0m


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting /content/FashionMNIST/raw/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw



[32m[I 2022-04-17 13:12:15,876][0m Trial 0 finished with value: 0.81171875 and parameters: {'n_layers': 2, 'n_units_l0': 119, 'dropout_l0': 0.28751041006938305, 'n_units_l1': 117, 'dropout_l1': 0.34686293198243245, 'optimizers': 'RMSprop', 'lr': 0.002967685174047254}. Best is trial 0 with value: 0.81171875.[0m
[32m[I 2022-04-17 13:12:19,788][0m Trial 1 finished with value: 0.81484375 and parameters: {'n_layers': 1, 'n_units_l0': 86, 'dropout_l0': 0.4898870352695034, 'optimizers': 'RMSprop', 'lr': 0.0015826085264511874}. Best is trial 1 with value: 0.81484375.[0m
[32m[I 2022-04-17 13:12:23,747][0m Trial 2 finished with value: 0.46796875 and parameters: {'n_layers': 2, 'n_units_l0': 15, 'dropout_l0': 0.24989929592970755, 'n_units_l1': 67, 'dropout_l1': 0.23137251238147208, 'optimizers': 'Adam', 'lr': 0.04398416090755745}. Best is trial 1 with value: 0.81484375.[0m
[32m[I 2022-04-17 13:12:27,650][0m Trial 3 finished with value: 0.7265625 and parameters: {'n_layers': 1, 'n_units

study statistic
number of finished trials:  100
number of proned trials:  79
number of completed trials:  21
Best trials:
Value 0.834375
Params
n_layers:1
n_units_l0:115
dropout_l0:0.32820105786471565
optimizers:RMSprop
lr:0.0023527684359174444


# **Tensorflow**

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist

In [None]:
N_TRAIN_EXAMPLES = 3000
N_VALID_EXAMPLES = 1000
BATCHSIZE = 128
CLASSES = 10
EPOCHS = 1

In [None]:
def create_model(trial):
    # We optimize the numbers of layers, their units and weight decay parameter.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    weight_decay = trial.suggest_float("weight_decay", 1e-10, 1e-3, log=True)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for i in range(n_layers):
        num_hidden = trial.suggest_int("n_units_l{}".format(i), 4, 128, log=True)
        model.add(
            tf.keras.layers.Dense(
                num_hidden,
                activation="relu",
                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
            )
        )
    model.add(
        tf.keras.layers.Dense(CLASSES, kernel_regularizer=tf.keras.regularizers.l2(weight_decay))
    )
    return model

In [None]:
def create_optimizer(trial):
    # We optimize the choice of optimizers as well as their parameters.
    kwargs = {}
    optimizer_options = ["RMSprop", "Adam", "SGD"]
    optimizer_selected = trial.suggest_categorical("optimizer", optimizer_options)
    if optimizer_selected == "RMSprop":
        kwargs["learning_rate"] = trial.suggest_float(
            "rmsprop_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["decay"] = trial.suggest_float("rmsprop_decay", 0.85, 0.99)
        kwargs["momentum"] = trial.suggest_float("rmsprop_momentum", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "Adam":
        kwargs["learning_rate"] = trial.suggest_float("adam_learning_rate", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "SGD":
        kwargs["learning_rate"] = trial.suggest_float(
            "sgd_opt_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["momentum"] = trial.suggest_float("sgd_opt_momentum", 1e-5, 1e-1, log=True)

    optimizer = getattr(tf.optimizers, optimizer_selected)(**kwargs)
    return optimizer


In [None]:
def learn(model, optimizer, dataset, mode="eval"):
    accuracy = tf.metrics.Accuracy("accuracy", dtype=tf.float32)

    for batch, (images, labels) in enumerate(dataset):
        with tf.GradientTape() as tape:
            logits = model(images, training=(mode == "train"))
            loss_value = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)
            )
            if mode == "eval":
                accuracy(
                    tf.argmax(logits, axis=1, output_type=tf.int64), tf.cast(labels, tf.int64)
                )
            else:
                grads = tape.gradient(loss_value, model.variables)
                optimizer.apply_gradients(zip(grads, model.variables))

    if mode == "eval":        
        return accuracy

In [None]:
def get_fashion_mnist():
    (x_train, y_train), (x_valid, y_valid) = fashion_mnist.load_data()
    x_train = x_train.astype("float32") / 255
    x_valid = x_valid.astype("float32") / 255

    y_train = y_train.astype("int32")
    y_valid = y_valid.astype("int32")

    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_ds = train_ds.shuffle(60000).batch(BATCHSIZE).take(N_TRAIN_EXAMPLES)

    valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
    valid_ds = valid_ds.shuffle(10000).batch(BATCHSIZE).take(N_VALID_EXAMPLES)
    return train_ds, valid_ds

In [None]:
def objective(trial):
    # Get MNIST data.
    train_ds, valid_ds = get_fashion_mnist()

    # Build model and optimizer.
    model = create_model(trial)
    optimizer = create_optimizer(trial)

    # Training and validating cycle.
    with tf.device("/cpu:0"):
        for _ in range(EPOCHS):
            learn(model, optimizer, train_ds, "train")

        accuracy = learn(model, optimizer, valid_ds, "eval")    

    # Return last validation accuracy.
    return accuracy.result()

# **Optuna Area**

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print("Number of finished trials: ", len(study.trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-04-17 13:14:49,838][0m A new study created in memory with name: no-name-c2e15266-fda3-4fa9-9af5-a1165f5d9404[0m


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


[32m[I 2022-04-17 13:15:22,613][0m Trial 0 finished with value: 0.10329999774694443 and parameters: {'n_layers': 3, 'weight_decay': 3.966398725390108e-09, 'n_units_l0': 23, 'n_units_l1': 5, 'n_units_l2': 16, 'optimizer': 'RMSprop', 'rmsprop_learning_rate': 1.3301158777449405e-05, 'rmsprop_decay': 0.9524280240216382, 'rmsprop_momentum': 1.4101409803623335e-05}. Best is trial 0 with value: 0.10329999774694443.[0m
[32m[I 2022-04-17 13:15:50,666][0m Trial 1 finished with value: 0.36489999294281006 and parameters: {'n_layers': 2, 'weight_decay': 7.494692479278656e-09, 'n_units_l0': 7, 'n_units_l1': 62, 'optimizer': 'RMSprop', 'rmsprop_learning_rate': 0.006868881557875344, 'rmsprop_decay': 0.924427881508892, 'rmsprop_momentum': 7.823227124855227e-05}. Best is trial 1 with value: 0.36489999294281006.[0m
[32m[I 2022-04-17 13:16:25,396][0m Trial 2 finished with value: 0.8090000152587891 and parameters: {'n_layers': 3, 'weight_decay': 3.69329139037261e-09, 'n_units_l0': 13, 'n_units_l1':

Number of finished trials:  100
Best trial:
  Value:  0.8855000138282776
  Params: 
    n_layers: 3
    weight_decay: 8.454565095434013e-05
    n_units_l0: 115
    n_units_l1: 93
    n_units_l2: 81
    optimizer: Adam
    adam_learning_rate: 0.0023754038805213753
