In [25]:
import pandas as pd
import numpy as np
import copy
import statsmodels.api as sm

In [209]:
def get_coords_from_bounds(bounds):
    """ Function to get initial coordinates to tune """

    # Setup the initial list that is used in classic JiaXing combo getting algorithm
    if type(bounds[0][0]) is tuple:
        boundary_coordinates = [[bounds[0][0][i]] for i in range(2)]
    elif type(bounds[0][0]) is set: 
        if len(bounds[0][0]) == 1:
          boundary_coordinates = [[list(bounds[0][0])[0]]]
        else:
          boundary_coordinates = [[list(bounds[0][0])[0]], [list(bounds[0][0])[-1]]]
    elif type(bounds[0][0]) is dict:
        boundary_coordinates = [[bounds[0][0]['values'][i]] for i in range(bounds[0][0]['values'])]

    # Second part of classic JiaXing combo getting algorithm
    for i in range(1, len(bounds)):
        old_boundary_coordinates = copy.deepcopy(boundary_coordinates)
        boundary_coordinates = list()

        values = bounds[i]

        for init_coord in old_boundary_coordinates:
            if type(values[0]) is tuple: # tuple: continuous values
                for value in values[0]:
                    tmp = copy.deepcopy(init_coord)
                    tmp.append(value)
                    boundary_coordinates.append(tmp)

            elif type(values[0]) is set: # set: semi-continuous values (ordinal or floats but not continuous) 
                if len(values[0]) == 1:
                  tmp = copy.deepcopy(init_coord)
                  tmp.append(list(values[0])[0])
                  boundary_coordinates.append(tmp)
                else:
                  for value in [list(values[0])[0], list(values[0])[-1]]:
                      tmp = copy.deepcopy(init_coord)
                      tmp.append(value)
                      boundary_coordinates.append(tmp)
            
            elif type(values[0]) is dict: # dict: discrete values
                for value in values[0]['values']:
                    tmp = copy.deepcopy(init_coord)
                    tmp.append(value)
                    boundary_coordinates.append(tmp)

    return boundary_coordinates



def get_centre(bounds, categorical):
    """ Helper that gets the centres of a bound as lists (considering for categorical) """

    # Classic JiaXing getting combo algorithm
    centre_components = list()
    tmp_cat = copy.deepcopy(categorical)
    for i in range(len(bounds)):

        if type(bounds[i][0]) is tuple: # tuple: continuous values
            # take the mean value
            centre_components.append(sum(bounds[i][0])/2)

        elif type(bounds[i][0]) is set: # set: semi-continuous values (ordinal or floats but not continuous) 
            n_semicont_values = len(bounds[i][0])
            if n_semicont_values <= 2:

                # just input the tuple(set) as the centre (which will be recognised as two discrete)
                centre_components.append(tuple(bounds[i][0]))

                # set categorical of this variable to true because no more semi-continuous values in between the current bounds
                tmp_cat[hyperparameter_names[i]] = True 

            else:
                # take the middle
                centre_components.append(list(bounds[i][0])[n_semicont_values//2])

        elif type(bounds[i][0]) is dict: # dict: discrete values
            # input the value (a set) which will be recognised as discrete
            centre_components.append(bounds[i][0]['values'])
    
    # returns 1. components that can be unpacked into multiple centres; 2. new categorical labels
    return centre_components, tmp_cat



def unpack_centre(centre_components):
  """ Helper to unpack centre components into centre """

  # Classic JiaXing algorithm for getting all combinations
  centres = [[]]
  for i in range(len(centre_components)):
      old_centres = copy.deepcopy(centres)
      centres = list()
      if type(centre_components[i]) is tuple:
        for obj in centre_components[i]:
          for cent in old_centres:
            tmp_cent = copy.deepcopy(cent)
            tmp_cent.append(obj)
            centres.append(tmp_cent)
      else:
        for cent in old_centres:
          tmp_cent = copy.deepcopy(cent)
          tmp_cent.append(centre_components[i])
          centres.append(tmp_cent)

  return [tuple(centre) for centre in centres]


  
def get_categorical(new_cat, boundaries):
  """ Helper to get all combos of categorical feature's values (for use in OLS) """

  # Classic JiaXing algorithm for getting all combinations
  out = [[]]
  for hyperparameter in new_cat:
    if new_cat[hyperparameter] is True:
      old_out = copy.deepcopy(out)
      out = list()

      val_list = list(boundaries[hyperparameter])
      val_list_unique = list(set(val_list))
      val_list_unique.sort()

      for val in val_list_unique:
        for lst in old_out:
          tmp = copy.deepcopy(lst)
          tmp.append(val)
          out.append(tmp)
  
  return out


def get_new_bounds(bounds, centre, categorical):
    """ Function to get new bounds """

    # get the range components that make the 2^d bounds
    range_components = get_range_components(bounds, centre, categorical)

    # make the new bounds from components
    new_bounds = make_bounds(range_components)

    return new_bounds


def get_range_components(bounds, centre, categorical):
    """ Helper that gets the range components """

    range_components = list()
    for i in range(len(bounds)):
        
        if type(bounds[i][0]) is tuple:
            lower_range = (bounds[i][0][0], centre[i]) 
            upper_range = (centre[i], bounds[i][0][1]) 

            ranges = (lower_range, upper_range)

            range_components.append((ranges, bounds[i][1]))
        
        elif type(bounds[i][0]) is set:
            
            if categorical[hyperparameter_names[i]] is False:
                lower_range_min_max = (list(bounds[i][0])[0], centre[i]) 
                upper_range_min_max = (centre[i], list(bounds[i][0])[-1]) 
                lower_range = list()
                for orig_val in list(PARAMETER_BOUNDS[hyperparameter_names[i]]):
                  if orig_val <= lower_range_min_max[1] and lower_range_min_max[0] <= orig_val:
                    lower_range.append(orig_val)
                lower_range.sort()
                lower_range = set(lower_range)
                
                upper_range = list()
                for orig_val in list(PARAMETER_BOUNDS[hyperparameter_names[i]]):
                  if orig_val <= upper_range_min_max[1] and upper_range_min_max[0] <= orig_val:
                    upper_range.append(orig_val)
                upper_range.sort()
                upper_range = set(upper_range)

                ranges = (lower_range, upper_range)

                range_components.append((ranges, bounds[i][1]))
            
            else:
                ranges = bounds[i][0]
                range_components.append((ranges, bounds[i][1]))


        elif type(bounds[i][0]) is dict:
            
            ranges = set(bounds[i][0]['values'])

            range_components.append((ranges, bounds[i][1]))
    
    return range_components



def make_bounds(range_components):
    """ Helper that makes the bounds using range components """

    # Algorithm to create all bounds
    if type(range_components[0][0]) is tuple:
        bounds = [[(range_components[0][0][i], range_components[0][1])] for i in range(2)] # hardcode cos bounds can only have 2 values
    elif type(range_components[0][0]) is set:
        tmp_tup = tuple(range_components[0][0])
        bounds = [[({tmp_tup[i]}, range_components[0][1])] for i in range(tmp_tup)]

    for i in range(1, len(range_components)):
        old_bounds = copy.deepcopy(bounds)
        bounds = list()

        values = range_components[i]

        for bound in old_bounds:
            
            if type(values[0]) == tuple:
                for value in values[0]:
                    tmp = copy.deepcopy(bound)
                    tmp.append((value, values[1]))

                    bounds.append(tmp)
            
            elif type(values[0]) == set:
                for value in list(values[0]):
                    tmp = copy.deepcopy(bound)
                    tmp.append(({value}, values[1]))

                    bounds.append(tmp)
    
    return bounds



def rebuild_bounds_to_original_format(tmp_boundary, new_cat, bounds):
    """ Helper to rebuild current format of bounds (as a df) into original bound format"""

    tmp_boundary = tmp_boundary.drop(['score'], axis = 1)
    
    bounds_original_format = list()
    for col in tmp_boundary.columns:

      # if already categorical: just keep it as categorical
      if new_cat[col] == True: 
        bounds_original_format.append(({'values': tuple(set(tmp_boundary[col]))}, col))
      
      else:
        col_vals = list(set(tmp_boundary[col]))
        
        if type(PARAMETER_BOUNDS[col]) is set:
          tmp = list()
          curr_val_max = max(col_vals)
          curr_val_min = min(col_vals)
          for orig_val in list(PARAMETER_BOUNDS[col]):
            if orig_val <= curr_val_max and curr_val_min <= orig_val:
              tmp.append(orig_val)
          tmp.sort()
          tmp = set(tmp)
          bounds_original_format.append((tmp, col))

        else: # continuous values
          bounds_original_format.append(((min(col_vals), max(col_vals)), col))

    return bounds_original_format

In [210]:
# This time only going to define bounds
PARAMETER_BOUNDS = {'A': (0, 10),
'B': (-4, 2),
'C': {0, 1, 2, 3, 4},
'D': {'values': ('a', 'b')}} # 自动化？

# parameter_bounds = {
# 'C': {0, 1, 2},
# 'D': {'values': ('a', 'b')},
# 'A': (0, 10),
# 'B': (0, 100)}

# parameter_bounds = {
# 'D': {'values': ('a', 'b')},
# 'A': (0, 10),
# 'B': (0, 100),
# 'C': {0, 1, 2}}

original_bounds = [(PARAMETER_BOUNDS[key], key) for key in PARAMETER_BOUNDS]
hyperparameter_names = [key for key in PARAMETER_BOUNDS]
categorical = {'A': False, 'B': False, 'C': False, 'D': True} # 自动化？
transform = {'A': None, 'B': '10^', 'C': None, 'D': None}

In [212]:

#def tune():


# start by putting original bounds into a list; this list is the object that will control whether algorithm has terminated
bounds_list = [original_bounds]
X = 0

while bounds_list: # gets reset every time, so algo will keep running if there are bounds to operate on
  if X > 3:
    break
  X += 1
  old_bounds_list = copy.deepcopy(bounds_list)
  bounds_list = list()
  
  print('\n\n', X)

  for bounds in old_bounds_list: # now run algorithm on every bound

    tuning_results = pd.DataFrame()

    # get the coordinates that define the bounds
    coords_to_tune = get_coords_from_bounds(bounds) 

    # get all coordinates into a DataFrame - used for 
    boundaries = pd.DataFrame()
    for coord in coords_to_tune:
      
      combo_dict = {} 
      combo_OLS_dict = {}
      for i in range(len(hyperparameter_names)):
        
        combo_OLS_dict[hyperparameter_names[i]] = [coord[i]]

        # TODO: add cases
        
        # add cases for transforming 
        if transform[hyperparameter_names[i]] == '10^':
          combo_dict[hyperparameter_names[i]] = [10**coord[i]]
        
        # 
        else:
          combo_dict[hyperparameter_names[i]] = [coord[i]]
      
      # get_score(combo) JIAXING TUNING CLASS
      combo_dict['score'] = [0]
      combo_OLS_dict['score'] = [0]
      tmp = pd.DataFrame(combo_dict)
      tuning_results = tuning_results.append(tmp) # TODO: TMP tuning result

      tmp_boundary = pd.DataFrame(combo_OLS_dict)
      boundaries = boundaries.append(tmp_boundary)

    # get the components that make up the centre (as well as new categories); and then unpack them into centres
    centre_components, new_cat = get_centre(bounds, categorical) # 加进去 - 改 for bound bounds with index
    
    centres = unpack_centre(centre_components)

    # get the categorical features' values into a list for use in OLS preparation
    categorical_value_list = get_categorical(new_cat, boundaries)

    for i in range(len(centres)): # run through each different centre

      # create a dataframe version of centre (so we could put it into OLS)
      centre_OLS_df = pd.DataFrame({hyperparameter_names[j]:[centres[i][j]] for j in range(len(centres[i]))})
      
      # copy the boundary dataframes - to turn into the correct training data for OLS (one lm model for each centre)
      tmp_boundary = copy.deepcopy(boundaries)
      tmp_boundary_drop = copy.deepcopy(boundaries)
      

      n_cat = 0
      for j in range(len(new_cat)):
        if new_cat[hyperparameter_names[j]] == True:

          tmp_boundary = tmp_boundary[tmp_boundary[hyperparameter_names[j]] == categorical_value_list[i][n_cat]]
          tmp_boundary_drop = tmp_boundary_drop[tmp_boundary_drop[hyperparameter_names[j]] == categorical_value_list[i][n_cat]]
          tmp_boundary_drop = tmp_boundary_drop.drop([hyperparameter_names[j]], axis = 1)
          centre_OLS_df = centre_OLS_df.drop([hyperparameter_names[j]], axis=1)
          n_cat += 1

      tmp_boundary_X = tmp_boundary_drop.drop(['score'], axis = 1)
      tmp_boundary_y = tmp_boundary_drop['score']

      OLS = sm.OLS(tmp_boundary_y, tmp_boundary_X).fit()
      pred_centre_score = OLS.predict(centre_OLS_df)[0]

      # get_score(combo_dict)
      # actual_centre_score = score
      actual_centre_score = 0
      
      combo_dict = {} 
      for j in range(len(hyperparameter_names)):

        # TODO: add cases
    
        # add cases for transforming 
        if transform[hyperparameter_names[j]] == '10^':
          combo_dict[hyperparameter_names[j]] = [10**centres[i][j]] 
        # 
        else:
          combo_dict[hyperparameter_names[j]] = [centres[i][j]]
      
      # get_score(combo) JIAXING TUNING CLASS
      combo_dict['score'] = [0] # actual score
      tmp = pd.DataFrame(combo_dict)
      tuning_results = tuning_results.append(tmp) # TODO: TMP tuning result

      # if actual_centre_score > pred_centre_score + 0.005 or actual_centre_score < pred_centre_score - 0.005:
      if True:
        # convert back out from 1
        bounds_original_format = rebuild_bounds_to_original_format(tmp_boundary, new_cat, bounds)
        new_bounds_list = get_new_bounds(bounds_original_format, centres[i], new_cat)
        print('new_bounds_list', new_bounds_list)
        print('\n')
        bounds_list.extend(new_bounds_list)


new_bounds_list [[((0, 5.0), 'A'), ((-4, -1.0), 'B'), ({0, 1, 2}, 'C'), ({'a'}, 'D')], [((0, 5.0), 'A'), ((-4, -1.0), 'B'), ({2, 3, 4}, 'C'), ({'a'}, 'D')], [((0, 5.0), 'A'), ((-1.0, 2), 'B'), ({0, 1, 2}, 'C'), ({'a'}, 'D')], [((0, 5.0), 'A'), ((-1.0, 2), 'B'), ({2, 3, 4}, 'C'), ({'a'}, 'D')], [((5.0, 10), 'A'), ((-4, -1.0), 'B'), ({0, 1, 2}, 'C'), ({'a'}, 'D')], [((5.0, 10), 'A'), ((-4, -1.0), 'B'), ({2, 3, 4}, 'C'), ({'a'}, 'D')], [((5.0, 10), 'A'), ((-1.0, 2), 'B'), ({0, 1, 2}, 'C'), ({'a'}, 'D')], [((5.0, 10), 'A'), ((-1.0, 2), 'B'), ({2, 3, 4}, 'C'), ({'a'}, 'D')]]


new_bounds_list [[((0, 5.0), 'A'), ((-4, -1.0), 'B'), ({0, 1, 2}, 'C'), ({'b'}, 'D')], [((0, 5.0), 'A'), ((-4, -1.0), 'B'), ({2, 3, 4}, 'C'), ({'b'}, 'D')], [((0, 5.0), 'A'), ((-1.0, 2), 'B'), ({0, 1, 2}, 'C'), ({'b'}, 'D')], [((0, 5.0), 'A'), ((-1.0, 2), 'B'), ({2, 3, 4}, 'C'), ({'b'}, 'D')], [((5.0, 10), 'A'), ((-4, -1.0), 'B'), ({0, 1, 2}, 'C'), ({'b'}, 'D')], [((5.0, 10), 'A'), ((-4, -1.0), 'B'), ({2, 3, 4}, 'C'),