In [1]:
from distutils.command import config

def random_search(problem, function_evaluations=10, **kwargs):

    history = []
    configs = []

    RANGES = GET_RANGES(problem) 

    for j in range(function_evaluations):
        config={}
        
        for i in RANGES:
            # If there is a condition, check if it holds.
            # If not, skip this hyperparameter
            if(condition(RANGES, config, i)):
                continue
            
            a = RANGES[i]['range'][0] 
            b = RANGES[i]['range'][1] 
            
            # If uniform
            if(RANGES[i]['sample']==0):
                if (RANGES[i]["type"] == 0):
                    value=np.random.choice(RANGES[i]['range']) 
                    config[i]=value  
                elif (RANGES[i]["type"] == 1):
                    value=np.random.uniform(a, b) 
                    config[i]=value
                else:
                    value=np.random.randint(a, b+1) 
                    config[i]=value
            # Else log        
            else:
                value=np.random.uniform(np.log(a), np.log(b)) 
                value = np.exp(value)
                if RANGES[i]['type'] == 2:
                    value = round(value)
                config[i]=value # exponentiate value back 
        if problem != 'interactive':        
            config['nodes_per_layer'] = return_node_list(config)        
        configs.append(config)
        history.append(GET_CONFIG_PERFORMANCE(config, problem))

    return history, configs

# Function for checking if hyperparameter has a condition and whether it holds
def condition(ranges, config, i):
        if('condition' in ranges[i]):
            if (ranges[i]['condition'](config) == False):
                return True

In [22]:
from utils import GET_CONFIG_PERFORMANCE, GET_RANGES, SampleType, ParamType,normal_dist # make sure to make use of ParamType and SampleType in your code
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import norm
from scipy import stats
import seaborn as sns
from scipy.stats import truncnorm
import math


def ugly_condition_check(x_star, i): 
    if 'nlayers' in x_star.keys():
        layers = x_star['nlayers']
        if i[-1].isdigit():
            if int(i[-1]) > layers:
                return True


def tpe2(problem, function_evaluations=120, random_warmup=30, gamma=1, n=10, **kwargs):

    RANGES = GET_RANGES(problem) 
    
    # Initial warm-up using random search
    loss,configs=random_search(problem,random_warmup)    
    
    hyperparameters = to_df(configs, loss)    
        
    for k in range(function_evaluations):
        
        good_df, bad_df = good_bad(gamma, hyperparameters)
        x_star = {}
        
        for i in good_df.columns.difference(['loss', 'nodes_per_layer']): 
            x = good_df[i].dropna().values.tolist()
            y = bad_df[i].dropna().values.tolist()
            full = x + y
            
            a = RANGES[i]['range'][0]
            b = RANGES[i]['range'][1]
                
            if(ugly_condition_check(x_star, i)):
                 continue
            
            # If categorical, sample uniform
            if RANGES[i]['type'] == 0:
                sample = sample_uniform(RANGES[i])
                x_star[i] = sample 
                
            else:
                if RANGES[i]['sample'] == 1:
                    x = np.log(x)
                    y = np.log(y)
                    full = np.log(full)
                    a, b = np.log(a), np.log(b)
                    
                # Sort because we need the sd to the furthest neighbour
                x.sort()                  
                y.sort()
                full.sort()

                std_full = scales(full, a, b)
                samples = sample_truncnorm(a, b, full, std_full, n)
                
                max_sd, max_sd_y = 0, 0

                if len(x) > 0:
                    max_sd = scales(x, a, b)
                if len(y) > 0:
                    max_sd_y = scales(y, a, b)

                #samples = sample_truncnorm(a, b, x, max_sd, n)
                
                EI = get_EI(samples, x, y, a, b, max_sd, max_sd_y)

                EI_max = np.argmax(EI)
                value = samples[EI_max]
                if RANGES[i]['sample'] == 1:
                    value = np.exp(value)
                if RANGES[i]['type'] == 2:
                    value = round(value)      
                x_star[i] = value
                
        if problem != 'interactive':
            x_star['nodes_per_layer'] = return_node_list(x_star)
            
        x_star['loss'] = GET_CONFIG_PERFORMANCE(x_star, problem)   
        hyperparameters = hyperparameters.append(x_star, ignore_index=True)
        
    best_hp = hyperparameters.iloc[hyperparameters['loss'].idxmin()]
    return best_hp

def get_EI(samples, x, y, a, b, max_sd, max_sd_y):
    EI = []
    for j in range(len(samples)):   
        lx = 0
        gx = 0
        if len(x) > 0:
            lx = get_pdf(samples[j], x, a, b, max_sd)     

        if len(y) > 0:
            gx = get_pdf(samples[j], y, a, b, max_sd_y)
        
        if gx > 0:
            EI.append(lx/gx)
        else:
            EI.append(lx)
    return EI

def return_node_list(x):
    nodes= []
    for i in x.keys():
        if 'nodes_in_layer' in i:
            nodes.append(x[i])
    return nodes
        
def to_df(configs, loss):

    hyper_parameters=pd.DataFrame(columns=["configs","loss"])
    hyper_parameters["configs"]=configs
    hyper_parameters["loss"]=loss
    
    hyper_parameters_1 = (hyper_parameters["configs"].apply(pd.Series))
    hyper_parameters_1['loss'] = hyper_parameters["loss"]
    
    
    return hyper_parameters_1
            
# Function for dividing samples into good and bad dataframes    
def good_bad(gamma, hyper_parameters):

    sorted_df=(hyper_parameters.sort_values(by=["loss"])).reset_index(drop=True)
    
    index_value=int(gamma*(sorted_df.shape[0]))
    
    good_df = sorted_df.iloc[:index_value]
    bad_df = sorted_df.iloc[index_value:sorted_df.shape[0]]
            
    return good_df, bad_df

def sample_truncnorm(a, b, x, sd, n):
    index = np.random.choice(range(len(x)))
    a, b = (a - x[index]) / sd[index], (b - x[index]) / sd[index]
    samples = stats.truncnorm.rvs(a, b, loc=x[index], scale=sd[index], size=n)
    return samples

def get_pdf(x_i, x, a, b, sd):
    n = len(x)
    total = 0
    
    for i in range(n):
        mean = x[i]
        sigma = sd[i]
        a, b = (a - mean) / sigma, (b - mean) / sigma
        total += stats.truncnorm.pdf(x_i, a, b, loc=mean, scale=sigma)
       # if error == 'error':
       #     print(x_i, a, b, mean, sigma)
        
    return total/n

def scales(x, a, b):
    if len(x) > 1:
        diff = np.diff(x)
    else:
        diff = 0
    epsilon = (b-a)/min(100,len(x)+2)
    scales = []
    for i in range(0, len(diff)):
        max_ = max(diff[i-1], diff[i], epsilon)
        sigma = min(max_, b-a)
        scales.append(sigma)        
    scales.insert(0,min(max(diff[0], epsilon), b-a))
    scales.insert(-1,min(max(diff[-1], epsilon), b-a))
    
    return scales

def sample_uniform(hyperparameter):
    sample = np.random.choice(hyperparameter['range'])    
    return sample

In [23]:
tpe2(problem = 'interactive')

{'hyper1': -11.27072067213108, 'hyper2': 24.66278494863046}
{'hyper1': 27.32204195569517, 'hyper2': -12.075825093706367}
{'hyper1': -2.8916881964227343, 'hyper2': -3.9619414933890766}
{'hyper1': 20.493776618846688, 'hyper2': -16.086472598477812}
{'hyper1': -22.263579021246578, 'hyper2': -27.749229675733844}
{'hyper1': 14.764399801695937, 'hyper2': 24.74287331545854}
{'hyper1': 37.98080299455944, 'hyper2': 17.93708213945372}
{'hyper1': -1.395226085709183, 'hyper2': -8.69654567949091}
{'hyper1': 6.088328373834962, 'hyper2': 28.531608954614015}
{'hyper1': -11.59285089953719, 'hyper2': -0.17130193641611413}
{'hyper1': -1.8344344886361625, 'hyper2': 6.0755932931441095}
{'hyper1': -27.165561184803344, 'hyper2': 2.9435574339122965}
{'hyper1': 20.476628725876942, 'hyper2': 31.22097805025605}
{'hyper1': -4.980677145967405, 'hyper2': -0.7241957701426713}
{'hyper1': 38.61551060323657, 'hyper2': 24.339997237730657}
{'hyper1': 13.309480128317084, 'hyper2': -31.165097690949022}
{'hyper1': 15.0543392

hyper1       -1.834434
hyper2        6.075593
loss     -10313.926922
Name: 10, dtype: float64

In [None]:
from scipy.stats import multivariate_normal

multivariate_normal([''])

In [None]:
def normal_dist(x , mean , sd):
    prob_density = (np.pi*sd) * np.exp(-0.5*((x-mean)/sd)**2)
    return prob_density

In [None]:
ln_loss = -(normal_dist(0.061932, mean=0.01, sd=0.003) + 0.00942477796076938)*106.1032953945969
nodes_loss = np.sum((-normal_dist(np.array([283, 45, 84]), mean=256, sd=200)+628.305132314888)*1.2*0.0015915833701941327)/3

In [None]:
ln_loss + nodes_loss