In [179]:
# https://rosettacode.org/wiki/Binary_search#Python

import numpy as np
from line_profiler import LineProfiler
from pysr import PySRRegressor
import random
import os
import sys
import math
import pickle

## Binary Search Algorithm

In [180]:
def binary_search(l, value): # logN
  low = 0
  high = len(l)-1
  while low <= high: 
    mid = (low+high)//2
    if l[mid] > value: high = mid-1
    elif l[mid] < value: low = mid+1
    else: return mid
  return -1

## Frequency Count Method

In [181]:
X_y = []
i=50
for n in range(2,i+1): # started with 2 itens until 50 itens in the list
  lprofiler = LineProfiler()
  lp_wrapper = lprofiler(binary_search)

  # create a list with random number between 1 to 10000
  # with n number in the list
  input = random.choices(range(10000), k=n)
  arr = sorted(input)
  lp_wrapper(arr, arr[-1]+1)

  stats = lprofiler.get_stats()
  line_numbers = []
  hits = []

  for line in stats.timings.values():
    for i in line:
      line_numbers.append(i[0])
      hits.append(i[1])

  X_y.append([n, sum(hits)])

dados = np.array(X_y)

X = dados[:, 0]
y = dados[:, 1]
X_reshaped = X.reshape(-1, 1)

In [182]:
resultados_com_menor_loss = []
repeat = 5
registros = []
unary_operators_list = ["log", "square", "cube", "sqrt", "round", "exp", "abs"]

original_stdout = sys.stdout

with open(os.devnull, 'w') as devnull:
  sys.stdout = devnull

  for i in range(repeat):

    # first combination
    reg1 = PySRRegressor(
      #binary_operators=["*", "+", "^"],
      unary_operators=unary_operators_list
    )

    fit1 = reg1.fit(X_reshaped, y)
    best_program1 = fit1.get_best()

    registro1 = []
    for index, value in enumerate(best_program1):
      registro1.append(value)
      
    registros.append(registro1)

    # second combination
    #reg2 = PySRRegressor(
    #  binary_operators=["*", "^"],
    #  unary_operators=unary_operators_list
    #)
#
    #fit2 = reg2.fit(X_reshaped, y)
    #best_program2 = fit2.get_best()
#
    #registro2 = []
    #for index, value in enumerate(best_program2):
    #  registro2.append(value)
    #registros.append(registro2)
#
    ## third combinarion
    #reg3 = PySRRegressor(
    #  binary_operators=["+", "^"],
    #  unary_operators=unary_operators_list
    #)
#
    #fit3 = reg3.fit(X_reshaped, y)
    #best_program3 = fit3.get_best()

    #registro3 = []
    #for index, value in enumerate(best_program3):
    #  registro3.append(value)
    #registros.append(registro3)
    
sys.stdout = original_stdout

[ Info: Started!
0.0%┣                                              ┫ 0/600 [00:00<00:-3, -0s/it]Expressions evaluated per second: [.....]. Head worker occupation: 0.0%         Press 'q' and then <enter> to stop execution early.                             Hall of Fame:                                                                   ---------------------------------------------------------------------------------------------------                                                             Complexity  Loss       Score     Equation                                       1           1.096e+02  1.594e+01  y = x₀                                        3           1.079e+02  8.017e-03  y = (-0.42948 + x₀)                           5           1.069e+02  4.375e-03  y = ((-0.42948 - 0.28055) + x₀)               10          1.062e+02  1.449e-03  y = ((-0.76719 + -0.23912) + log(exp(x₀) - exp(-0.68086)))                                                                    12          8.551e+01  

In [183]:
registros_ = registros
file_pickle = 'dados_v3.pck'
for i in registros_: print(i)

[6, 1.2914093, 1.3637021394642477, 'log(square(cube(x0) / 0.0616016))', log(263.521476527432*x0**6), PySRFunction(X=>log(263.521476527432*x0**6))]
[6, 1.2914089, 2.5034278126835927, 'log(square(cube(x0 * 2.5320659)))', log(263.542115096131*x0**6), PySRFunction(X=>log(263.542115096131*x0**6))]
[6, 1.2914093, 0.923160249638048, '(log(square(cube(x0))) - -5.5740113)', log(x0**6) + 5.5740113, PySRFunction(X=>log(x0**6) + 5.5740113)]
[6, 1.1680324, 1.0235738516092419, '((log(x0) * 5.549138) + 6.9403343)', 5.549138*log(x0) + 6.9403343, PySRFunction(X=>5.549138*log(x0) + 6.9403343)]
[8, 0.32362172, 1.2916010386025742, '(round(sqrt(sqrt(x0)) * -2.7543201) * -3.9911413)', -3.9911413*ceiling(-2.7543201*x0**(1/4) - 0.5), PySRFunction(X=>-3.9911413*ceiling(-2.7543201*x0**(1/4) - 0.5))]


In [184]:
for i in registros_:
  loss = i[1]
  score = i[2]
  complexity = i[0]
  w = (loss * score)/complexity
  if math.isnan(w):
    i.append(0)
  else:
    i.append(w)

lista_melhor_valor = max(registros_, key=lambda x: x[6])

## Save result

In [185]:
def salvar_dados(dados, key, arquivo):
  if os.path.exists(arquivo):
    with open(arquivo, 'rb') as f:
      dados_exist = pickle.load(f)
  else:
    dados_exist = {}
    
  valor_original = dados_exist.get(key)
  if valor_original == None:
    dados_exist.update({key: [dados]})
  else:
    if isinstance(valor_original, list):
      valor_original.append(dados)
    else:
      dados_exist.update({key: [dados]})

  with open(arquivo, 'wb') as f:
    pickle.dump(dados_exist, f)

caminho_arquivo = file_pickle
novos_dados = lista_melhor_valor[0:3] + [lista_melhor_valor[4]]

salvar_dados(novos_dados, 'binary_search_logx', caminho_arquivo)

In [186]:
def carregar_dados(arquivo):
    # Carrega os dados do arquivo pickle
    with open(arquivo, 'rb') as f:
        dados = pickle.load(f)
    return dados

caminho_arquivo = file_pickle
dados_carregados = carregar_dados(caminho_arquivo)

print("Conteúdo do arquivo pickle:")
for k, v in dados_carregados.items():
  if k == 'binary_search_logx':
    print('\u25CF', k)
    for index, item in enumerate(v):
      if index == len(v)-1:
        print('└─', item)
      else:
        print('├─', item)
    print('==========================')

Conteúdo do arquivo pickle:
● binary_search_logx
├─ [5, 1.2686663, 2.221314141492738, 10.9540097681169*x0**(1/4)]
├─ [5, 1.2686663, 2.221314226989192, 10.953988*x0**(1/4)]
├─ [5, 1.2686664, 2.221314062669807, 10.9540015519444*x0**(1/4)]
├─ [5, 1.2686663, 2.221314141492738, 10.9539924228566*x0**(1/4)]
├─ [5, 1.529159, 1.8543049601950847, log(x0)**2 + 13.966406]
├─ [6, 1.2914093, 1.37589542429269, log(x0**6) + 5.574101]
├─ [6, 1.2914089, 2.2426865508550637, log(263.542115096131*x0**6)]
├─ [5, 1.529159, 2.1947966974644983, log(x0)**2 + 13.966284]
├─ [6, 1.1680324, 1.0235737900855137, 5.549138*log(x0) + 6.9403343]
├─ [6, 1.1680324, 1.0235737900855137, log(x0**5.54914) + 6.9403296]
├─ [6, 1.1680324, 1.0235737285617819, log(x0**5.5494156) + 6.939477]
├─ [6, 1.1680325, 1.023573642947722, 5.5491385*log(x0) + 6.9403334]
├─ [5, 1.2575216, 0.6898708588950153, 10.73528*x0**0.25614613]
├─ [5, 1.2573884, 0.6898973409560284, 10.721785*x0**0.25653192]
├─ [5, 1.2574341, 0.6898882548276822, 10.728042*x0