In [113]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [114]:
#importing the required lib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.preprocessing import LabelEncoder , OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import time
import sys
#new
import itertools
import pathlib

In [115]:
import torch
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda, transforms
from torch.utils.data import DataLoader

import argparse
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F  
from torchsummary import summary


In [116]:
import logging
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.trial import TrialState

from progress_table import ProgressTable


In [117]:
DEVICE = torch.device("cuda")

In [None]:
# Dataloader

def MNIST_loaders(device, args):

    # From cnn_mnist2.py:
    transform=Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,))
        ])
    ######################
    data = MNIST("./data/", train=True,
                download=True,
                transform=transform)
    data.data.to(device)
    data.targets.to(device)
    
    train_loader = DataLoader(
        data,
        batch_size=args.batch_size, shuffle=True)

    test = MNIST('./data/', train=False,
                download=True,
                transform=transform)
    test.data.to(device)
    test.targets.to(device)
    
    test_loader = DataLoader(
        test,
        batch_size=args.test_batch_size, shuffle=False)

    return train_loader, test_loader


In [None]:
# Layer classes

class Conv_layer(nn.Conv2d): 
    
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, lr=0.05,
                bias=True, device=DEVICE, dtype=None): # original: bias=True
        super().__init__(in_channels, out_channels, kernel_size, stride, padding, bias=bias, device=device, dtype=dtype)
        self.relu = torch.nn.ReLU()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        self.opt = Adam(self.parameters(), lr=lr)
        self.flatten = nn.Flatten()
        self.maxpool = nn.MaxPool2d(2)
        
    def forward(self, x):
        x_direction = x / (x.norm(2, (1,2,3), keepdim=True) + 1e-4)
        x = self.relu(self.conv(x_direction))
        #x = self.maxpool(x)
        return x
            
    def layer_train(self, x_pos, x_neg, threshold=2.0, do_step=0):          
        conv_pos3d = self.forward(x_pos)
        conv_neg3d = self.forward(x_neg)
        conv_pos1d = self.flatten(conv_pos3d) 
        conv_neg1d = self.flatten(conv_neg3d)
        g_pos = conv_pos1d.pow(2).mean(1)
        g_neg = conv_neg1d.pow(2).mean(1)
        # The following loss pushes pos (neg) samples to
        # values larger (smaller) than the self.threshold.
        loss = torch.log(1 + torch.exp(torch.cat([
            -g_pos + threshold,
            g_neg - threshold]))).mean()
        self.opt.zero_grad()
        # this backward just compute the derivative and hence
        # is not considered backpropagation.
        loss.backward()
        self.opt.step()
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()
    
class FC_layer(nn.Linear): 
    def __init__(self, in_features, out_features,
                bias=True, device=DEVICE, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = Adam(self.parameters(), lr=0.01)

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
        return self.relu(
            torch.mm(x_direction, self.weight.T) +
            self.bias.unsqueeze(0))

    def layer_train(self, x_pos, x_neg, threshold):
        g_pos = self.forward(x_pos).pow(2).mean(1)
        g_neg = self.forward(x_neg).pow(2).mean(1)
        # The following loss pushes pos (neg) samples to
        # values larger (smaller) than the self.threshold.
        loss = torch.log(1 + torch.exp(torch.cat([
            -g_pos + threshold,
            g_neg - threshold]))).mean()
        self.opt.zero_grad()
        # this backward just compute the derivative and hence
        # is not considered backpropagation.
        loss.backward(retain_graph=True)
        self.opt.step()
        
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()

# class Linear_Classifier(nn.Linear):
#     def __init__(self, in_features, out_features, lr,
#                 bias=False, device=DEVICE, dtype=None):
#         super().__init__(in_features, out_features, bias, device, dtype)
#         self.linear_classi = nn.Linear(in_features, out_features, bias=False)
#         self.opt = Adam(self.parameters(), lr=lr)
#         self.classification_loss = nn.CrossEntropyLoss()

#     def forward(self, x):
#         #x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
#         x = self.linear_classi(x.detach())
#         return x

#     def layer_train(self, x, y):    
#         output = self.forward(x)
#         output = output - torch.max(output, dim=-1, keepdim=True)[0]
#         loss = self.classification_loss(output, y)   
#         self.opt.zero_grad()
#         loss.backward()
#         self.opt.step()


In [None]:
# Optuna: Learning rate setting

def get_lr_scheduler(optimizer, params):
            if params["lr_decay"] == "exp_decay":
                return torch.optim.lr_scheduler.ExponentialLR(
                    optimizer=optimizer,
                    gamma=0.8,
                )
            elif params["lr_decay"] == "cosine_decay":
                return torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer=optimizer,
                    T_max=EPOCHS * STEPS_PER_EPOCH,
                )

In [None]:
# Optuna: Optimizer Setting

def get_optimizer(model,params):
    # We optimize the choice of optimizers as well as their parameters.

    if params["opt"] == "Adam":
        return torch.optim.Adam(model.parameters(),
                                lr=params["lr_init"])
    elif params["opt"] == "SGD":
        return torch.optim.SGD(model.parameters(),
                                lr=params["lr_init"], 
                                momentum=params["opt.sgd.moment"],
                                nesterov=params["opt.sgd.nesterov"])
    elif params["opt"] == "RMSprop":
        return torch.optim.RMSprop(model.parameters(),
                                    lr=params["lr_init"])

In [None]:
# Optuna: Model definition

class Net_FF_Conv(torch.nn.Module):

    def __init__(self, epochs, batch_size, params):
        super().__init__()
        self.num_epochs = epochs
        self.batch_size = batch_size
        self.lr = 0.01 # in kaggle it is 0.001
        self.threshold = 10.0
        self.outchns = 0
        
        self.flatten = nn.Flatten()
        self.model = self.Model(params)
        #self.linear_classifier = Linear_Classifier(10, 10, self.lr, bias=False)

        self.classification_loss = nn.CrossEntropyLoss()
        self.opt = params["opt"]
        self.scheduler = params["lr_decay"]
        
    def Model(params):
        
        structure = 1
        FFDNN = []
        outchn_layer = []
        
        in_v_size = params["input_dim_v"]
        in_h_size = params["input_dim_h"]
        in_chn = params["input_chn"]
        out_v_size = in_v_size
        out_h_size = in_h_size
        out_chn = in_chn

        #############################################################################################################
        
        stopped = 0
        for i in range(params["level"]):
            if structure:
                print("in_size: (", in_v_size,",",in_h_size,",",in_chn,")")
            if (params["kernel_ff {}".format(i)] > 1) & (params["stride_ff {}".format(i)] > 1) & (params["kernel_ff {}".format(i)] + params["stride_ff {}".format(i)] > in_v_size):
                print("Model gen - layer ",params["level"],": Kernel + stride (",params["kernel_ff {}".format(i)]," + ",params["stride_ff {}".format(i)],") is larger than in_v_size=",in_v_size,":")
                while (params["kernel_ff {}".format(i)] > 1) & (params["stride_ff {}".format(i)] > 1) & (params["kernel_ff {}".format(i)] + params["stride_ff {}".format(i)] > in_v_size):
                    if params["kernel_ff {}".format(i)] > params["stride_ff {}".format(i)]:
                        params["kernel_ff {}".format(i)] = params["kernel_ff {}".format(i)] - 1
                    else:
                        params["stride_ff {}".format(i)] = params["stride_ff {}".format(i)] - 1
                print("    -> Reducing them to Kernel + stride = ",params["kernel_ff {}".format(i)]," + ",params["stride_ff {}".format(i)])
            if (params["kernel_ff {}".format(i)] > 1) & (params["stride_ff {}".format(i)] > 1) & (params["kernel_ff {}".format(i)] + params["stride_ff {}".format(i)] > in_h_size):
                print("Model gen - layer ",params["level"],": Kernel + stride (",params["kernel_ff {}".format(i)]," + ",params["stride_ff {}".format(i)],") is larger than in_h_size=",in_h_size,":")
                while (params["kernel_ff {}".format(i)] > 1) & (params["stride_ff {}".format(i)] > 1) & (params["kernel_ff {}".format(i)] + params["stride_ff {}".format(i)] > in_h_size):
                    if params["kernel_ff {}".format(i)] > params["stride_ff {}".format(i)]:
                        params["kernel_ff {}".format(i)] = params["kernel_ff {}".format(i)] - 1
                    else:
                        params["stride_ff {}".format(i)] = params["stride_ff {}".format(i)] - 1
                print("    -> Reducing them to Kernel + stride = ",params["kernel_ff {}".format(i)]," + ",params["stride_ff {}".format(i)])
            out_v_size = int((in_v_size + 2 - params["kernel_ff {}".format(i)])/params["stride_ff {}".format(i)]) +1
            out_h_size = int((in_h_size + 2 - params["kernel_ff {}".format(i)])/params["stride_ff {}".format(i)]) +1
            out_chn = params["filter_ff {}".format(i)]
            if (out_v_size <= 2) | (out_h_size <= 2):
                stopped = i
                out_v_size = in_v_size
                out_h_size = in_h_size 
                out_chn = in_chn
                break
            in_v_size = out_v_size
            in_h_size = out_h_size
            in_chn = out_chn
            if i == 0:
                FFDNN.append(Conv_layer(in_channels=params["input_chn"],
                                out_channels=params["filter_ff {}".format(i)], 
                                kernel_size=(params["kernel_ff {}".format(i)],params["kernel_ff {}".format(i)]),
                                stride=params["stride_ff {}".format(i)],
                                padding=1))
            else:
                FFDNN.append(Conv_layer(in_channels=params["filter_ff {}".format(i-1)],
                                out_channels=params["filter_ff {}".format(i)], 
                                kernel_size=(params["kernel_ff {}".format(i)],params["kernel_ff {}".format(i)]),
                                stride=params["stride_ff {}".format(i)],
                                padding=1))
            outchn_layer.append(out_v_size*out_h_size*out_chn)                
            if structure:
                print("  out_size (cnn): (", out_v_size,",",out_h_size,",",out_chn,")")
            #print(FFDNN)
            
        if stopped:
            print("Model gen - layer ",i,": The model has been reduced too far, stopping feature extractor at level=", i - 1)
            params["level"] = stopped

        FFDNN.append(nn.Linear(np.sum(outchn_layer),params["classes"],bias=False))
        
        return nn.Sequential(*FFDNN)
    
    def predict_FF(self, h_test, y_test):
        self_model.eval()
        pred_acc = []
        for j in range(int(h_test.shape[0]/self.batch_size)):
            x = h_test[j*self.batch_size:(j+1)*self.batch_size].to(DEVICE)
            y = y_test[j*self.batch_size:(j+1)*self.batch_size].to(DEVICE)
            h = self.flatten(x)
            h = get_neutral_label_before_FC(h)            
            input_classification_model = []
            first_layer = 1
            for layer in self.model:
                h = layer(h)
                if(first_layer==0):
                    input_classification_model.append(h)
                first_layer = 0   
            input_classification_model = torch.concat(input_classification_model, dim=-1)
            output = self.linear_classifier(input_classification_model.detach())
            output = output - torch.max(output, dim=-1, keepdim=True)[0]
            pred = output.argmax(1)
            pred_acc.append(pred.eq(y).float().mean().item())
        return sum(pred_acc) / len(pred_acc)
        

    def net_train_FF(self, h_train, y_train):
        self.model.train()
        for i in range(self.num_epochs):
            for j in range(int(h_train.shape[0]/self.batch_size)):
                x = h_train[j*self.batch_size:(j+1)*self.batch_size].to(DEVICE)
                y = y_train[j*self.batch_size:(j+1)*self.batch_size].to(DEVICE)
                x = self.flatten(x)
                x_pos = overlay_y_on_x_before_FC(x, y).to(DEVICE)
                rnd = torch.randperm(x.size(0)).to(DEVICE)
                while(len(torch.where(y[rnd]==y)[0]) > 0):
                    rnd[torch.where(rnd == (self.batch_size-1))] -= 10
                    rnd[torch.where(y[rnd]==y)] += 1
                x_neg = overlay_y_on_x_before_FC(x, y[rnd]).to(DEVICE)
                
                h_pos, h_neg = x_pos, x_neg
                
                for layer in self.model[:-1]:
                    h_pos, h_neg = layer.layer_train(h_pos, h_neg, self.threshold)
                    
                # train linear classifier:
                input_classification_model = []
                h = get_neutral_label_before_FC(x)
                nonfirst_layer = 0
                for layer in self.model[:-1]:
                    h = layer(h)
                    if nonfirst_layer:
                        input_classification_model.append(h)
                    nonfirst_layer = 1  
                input_classification_model = torch.concat(input_classification_model, dim=-1)
                #self.linear_classifier.layer_train(input_classification_model, y)
                output = self.model[end](input_classification_model, y)
                output = output - torch.max(output, dim=-1, keepdim=True)[0]
                
                loss = self.classification_loss(output, y)   
                self.opt.zero_grad()
                loss.backward()
                self.opt.step()


In [None]:
# Optuna: Params definition

def define_search_space(trial):
    # Hyperparameters to be tuned by Optuna.
    params = dict()
    
    params["input_dim_v"] = trial.suggest_categorical("input_dim_v", [28])
    params["input_dim_h"] = trial.suggest_categorical("input_dim_h", [28])
    params["input_chn"] = trial.suggest_categorical("input_chn", [1])
    params["classes"] = trial.suggest_categorical("classes", [10])
    
    ### Optimizer
    #params["opt"] = trial.suggest_categorical("opt", ["SGD"]) # default
    params["opt"] = trial.suggest_categorical("opt", ["Adam"])
    #params["opt"] = trial.suggest_categorical("opt", ["Adam", "SGD", "RMSprop"])
    #params["opt.sgd.moment"] = trial.suggest_categorical("opt.sgd.moment", [0.9]) # default
    #params["opt.sgd.moment"] = trial.suggest_float("opt.sgd.moment", 0.0, 1.0)
    #params["opt.sgd.nesterov"] = trial.suggest_categorical("opt.sgd.nesterov", [True]) # default
    
    #params["lr_init"] = trial.suggest_float("lr_init", 1e-5, 1e-1, log=True)
    params["lr_init"] = trial.suggest_categorical("lr_init", [0.001])
    #params["lr_init"] = trial.suggest_categorical("lr_init", [0.001, 0.0001, 0.00001])
    #params["lr_decay"] = trial.suggest_categorical("lr_decay", ["cosine_decay"]) # default
    params["lr_scheduler"] = trial.suggest_categorical("lr_scheduler", [True])
    if params["lr_scheduler"]:
        params["lr_decay"] = trial.suggest_categorical("lr_decay", ["exp_decay"])
    #params["lr_decay"] = trial.suggest_categorical("lr_decay", ["None","exp_decay","cosine_decay"])
        
    ### Model architecture
    # Feature-Extractor
    #params["level"] = trial.suggest_categorical("level", [1, 2, 3, 4, 5, 6, 7, 8])
    #params["level"] = trial.suggest_categorical("level", [1, 2, 3, 4, 5])
    #params["level"] = trial.suggest_categorical("level", [1, 2, 3, 4])
    params["level"] = trial.suggest_categorical("level", [2, 3, 4])
    #params["level"] = trial.suggest_categorical("level", [2])
    for i in range(params["level"]):
        #params["filter_ff {}".format(i)] = trial.suggest_categorical("filter_ff {}".format(i), [8, 16, 32, 48, 64, 80, 96])
        params["filter_ff {}".format(i)] = trial.suggest_categorical("filter_ff {}".format(i), [8, 16, 32, 48, 64])
        #params["kernel_ff {}".format(i)] = trial.suggest_categorical("kernel_ff {}".format(i), [3, 5, 7, 9, 11, 13, 15])
        #params["kernel_ff {}".format(i)] = trial.suggest_categorical("kernel_ff {}".format(i), [3, 5 , 7])
        params["kernel_ff {}".format(i)] = trial.suggest_categorical("kernel_ff {}".format(i), [3, 5])
        #params["stride_ff {}".format(i)] = trial.suggest_categorical("stride_ff {}".format(i), [1, 2, 3, 4, 5, 6, 7])
        #params["stride_ff {}".format(i)] = trial.suggest_categorical("stride_ff {}".format(i), [1, 2, 3, 4])
        params["stride_ff {}".format(i)] = trial.suggest_categorical("stride_ff {}".format(i), [1, 2])

    return params

In [None]:
# Optuna: Ctrl flow

def objective(trial, args):

    params = define_search_space(trial)
    
    # Create tf.keras model instance.
    model = Net_FF_Conv.Model(params).to(DEVICE)
    
    def print_attr (module):
        if hasattr(module, "bias"):
            print("module ", module," bias:", hasattr(module, "bias"), end="")
            if hasattr(module, "bias") and hasattr(module.bias, "size"):
                print(" size:", hasattr(module.bias, "size"), " size = ", module.bias.size())
            else:
                print("\n")
        else:
            print("module ", module," has no bias")
    #model.apply(print_attr)
    
    #print(model)
    summary(model, (1,params["input_dim_v"],params["input_dim_h"]))
    
    optimizer=get_optimizer(model, params)
    if params["lr_scheduler"]:
        scheduler = get_lr_scheduler(optimizer, params)

    # Create dataset instance.
    train_loader, valid_loader = MNIST_loaders(DEVICE, args)

    
    # Training of the model.
    for epoch in range(args.epochs):
        print("epoch: ", epoch)
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            #print("    batch_idx, train: ", batch_idx)
            #print("        data shape: ", data.size())
            # Limiting training data for faster epochs.
            #if batch_idx * args.batch_size >= args.batch_size * 30:
                #print("    break")
            #    break

            data, target = data.to(DEVICE), target.to(DEVICE)

            #---
            # x_pos = overlay_y_on_x(data, target)
            # rnd = torch.randperm(data.size(0)).to(DEVICE)
            # while(len(torch.where(target[rnd]==target[0])[0]) > 0):
            #     rnd[torch.where(rnd == (args.test_batch_size-1))] -= 10
            #     rnd[torch.where(target[rnd]==target)] += 1
            # x_neg = overlay_y_on_x(data, target[rnd])
                
            # h_pos, h_neg = x_pos, x_neg

            # h_pos, h_neg = self.conv1.layer_train(h_pos, h_neg, self.threshold, do_step=0)
            #----

            #### EXTENSION!!! ####

            optimizer.zero_grad()
            #output = model(data)
            output = model.net_train(data,target)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
        
        if params["lr_scheduler"]:
            scheduler.step()

        print("-------------------------------------------------------")
        
        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                #print("    batch_idx, valid: ", batch_idx)
                # Limiting validation data.
                #if batch_idx * args.batch_size >= args.batch_size * 10:
                #    break
                data, target = data.to(DEVICE), target.to(DEVICE)
                
                #output = model(data)
                output = model.predict(data,target)
                
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), args.batch_size * 10)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        # if trial.should_prune():
        #     raise optuna.exceptions.TrialPruned()
        
    return accuracy

In [None]:
# Optuna: Result printout

def show_result(study,best_model_view=False, best_model_train=False):
    pruned_trials = study.get_trials(deepcopy=True, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=True, states=[TrialState.COMPLETE])

    print("\n========================== Statistics ==========================")
    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    best_trial = study.best_trial

    print("  Number: ", best_trial.number)
    print("  Value: ", best_trial.value)

    print("  Params: ")
    for key, value in best_trial.params.items():
        print("    {}: {}".format(key, value))
        
        
    print("\nComplete trials:")
    for i, trial in enumerate(complete_trials):
        print("    {0:2d} [{1:2d}] val={2:6.4f} -> ".format(i,trial.number,trial.value), end="")
        for key, value in trial.params.items():
            print("{}: {} ".format(key,value), end="")
        print("")
        
    print("\nPruned trials:")
    for i, trial in enumerate(pruned_trials):
        print("    {0:2d} [{1:2d}] val={2:6.4f} -> ".format(i,trial.number,trial.value), end="")
        for key, value in trial.params.items():
            print("{}: {} ".format(key,value), end="")
        print("")
                
        
    if best_model_view:
        params = best_trial.params
        model = Model(params).to(DEVICE)
        
        print("\n=========================== Summary ============================")
        summary(model, (1,params["input_dim_v"],params["input_dim_h"]))
        print("")
    
    optimizer=get_optimizer(model, params)
    if params["lr_scheduler"]:
        scheduler = get_lr_scheduler(optimizer, params)

    if best_model_train:
        if not(best_model_view):
            params = best_trial.params
            model = Model(params).to(DEVICE)

        train_loader, valid_loader = MNIST_loaders(DEVICE, args)
        
        # Train model.
        print("\n============================= Training Model =============================")
        for epoch in range(args.epochs):
            print("epoch: ", epoch)
            model.train()
            correct = 0
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)

                optimizer.zero_grad()
                output = model(data)
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
                
                loss = F.nll_loss(output, target)
                loss.backward()
                optimizer.step()
            
            if params["lr_scheduler"]:
                scheduler.step()
                
            accuracy = correct / min(len(train_loader.dataset), args.batch_size * 10)

            trial.report(accuracy, epoch)

        #plt.plot(history.history['accuracy'],color = 'blue', label = 'accuracy')
        #plt.plot(history.history['val_accuracy'],color = 'red', label = 'val')
        #plt.title('Model accuracy')
        #plt.ylabel('Accuracy')
        #plt.xlabel('Epoch')
        #plt.legend
        
        print("\n============================== Saving Model ==============================")
        torch.save(model, "FF.best_model.pytorch")
        print("Saved model to disk")
        
        # evaluate the model
        print("\n============================ Evaluating Model ============================")
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                data, target = data.to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), args.batch_size * 10)

        print("Evaluation Accuracy:")
        trial.report(accuracy, epoch)


def main(args):
    # Add stream handler of stdout to show the messages
    optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
    study_name = "FF-Sandbox-Elaborate"  # Unique identifier of the study.
    storage_name = "sqlite:///{}.db".format(study_name)
    
    if os.path.exists("{}.db".format(study_name)):
        optuna.delete_study(study_name=study_name, storage=storage_name)
        
    study = optuna.create_study(
        direction="maximize", pruner=optuna.pruners.MedianPruner(n_startup_trials=2),
        study_name=study_name, storage=storage_name
    )

    #study.optimize(objective, n_trials=150, timeout=600)
    #study.optimize(objective, n_trials=150)
    #study.optimize(objective, n_trials=50)
    #study.optimize(objective, n_trials=20)
    study.optimize(lambda trial: objective(trial, args), n_trials=2)
    #study.optimize(objective, n_trials=300)
    #study.optimize(objective, n_trials=400, args)

    show_result(study, best_model_view=True, best_model_train=True)

if __name__ == "__main__":
        # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=128, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=128, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--no-mps', action='store_true', default=False,
                        help='disables macOS GPU training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    #parser.add_argument('--log-interval', type=int, default=10, metavar='N',
    #                    help='how many batches to wait before logging training status')
    parser.add_argument('--log-interval', type=int, default=1, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    #args = parser.parse_args()
    args, unknown = parser.parse_known_args()
    print("unknown=", unknown)
    # Until here

    main(args)

unknown= ['--f=/home/ralf/.local/share/jupyter/runtime/kernel-v3d15f8a0bd67b7609977f319af35feb3ac7cabe7b.json']


[I 2025-01-12 19:07:40,747] A new study created in RDB with name: FF-Sandbox-Elaborate


A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate
A new study created in RDB with name: FF-Sandbox-Elaborate


[W 2025-01-12 19:07:41,131] Trial 0 failed with parameters: {'input_dim_v': 28, 'input_dim_h': 28, 'input_chn': 1, 'classes': 10, 'opt': 'Adam', 'lr_init': 0.001, 'lr_scheduler': True, 'lr_decay': 'exp_decay', 'level': 3, 'filter_ff 0': 48, 'kernel_ff 0': 5, 'stride_ff 0': 2, 'filter_ff 1': 64, 'kernel_ff 1': 3, 'stride_ff 1': 2, 'filter_ff 2': 8, 'kernel_ff 2': 5, 'stride_ff 2': 2} because of the following error: RuntimeError('mat1 and mat2 shapes cannot be multiplied (48x3 and 11320x10)').
Traceback (most recent call last):
  File "/home/ralf/.conda/envs/proj_odl_pytorch_env/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3214671/2995609934.py", line 128, in <lambda>
    study.optimize(lambda trial: objective(trial, args), n_trials=2)
  File "/tmp/ipykernel_3214671/1588416694.py", line 20, in objective
    summary(model, (1,params["input_dim_v"],params["input_dim_h"]))
  File "/home/ralf/.conda/

in_size: ( 28 , 28 , 1 )
  out_size (cnn): ( 13 , 13 , 48 )
in_size: ( 13 , 13 , 48 )
  out_size (cnn): ( 7 , 7 , 64 )
in_size: ( 7 , 7 , 64 )
  out_size (cnn): ( 3 , 3 , 8 )
Trial 0 failed with parameters: {'input_dim_v': 28, 'input_dim_h': 28, 'input_chn': 1, 'classes': 10, 'opt': 'Adam', 'lr_init': 0.001, 'lr_scheduler': True, 'lr_decay': 'exp_decay', 'level': 3, 'filter_ff 0': 48, 'kernel_ff 0': 5, 'stride_ff 0': 2, 'filter_ff 1': 64, 'kernel_ff 1': 3, 'stride_ff 1': 2, 'filter_ff 2': 8, 'kernel_ff 2': 5, 'stride_ff 2': 2} because of the following error: RuntimeError('mat1 and mat2 shapes cannot be multiplied (48x3 and 11320x10)').
Traceback (most recent call last):
  File "/home/ralf/.conda/envs/proj_odl_pytorch_env/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3214671/2995609934.py", line 128, in <lambda>
    study.optimize(lambda trial: objective(trial, args), n_trials=2)
  File "/tmp/ipy

[W 2025-01-12 19:07:41,141] Trial 0 failed with value None.


Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.


RuntimeError: mat1 and mat2 shapes cannot be multiplied (48x3 and 11320x10)

In [None]:
#os.system("optuna-dashboard sqlite:///DCNN-Elaborate.db --port 8081 --host 0.0.0.0 &")