In [69]:
# https://rosettacode.org/wiki/Sorting_algorithms/Heapsort#Python

import numpy as np
from line_profiler import LineProfiler
from pysr import PySRRegressor
import random
import os
import sys
import math
import pickle

## Heapsort Algorithm

In [70]:
def heapsort(lst): # N log N 
  ''' Heapsort. Note: this function sorts in-place (it mutates the list). '''

  # in pseudo-code, heapify only called once, so inline it here
  for start in range(int((len(lst)-2)/2), -1, -1):
    siftdown(lst, start, len(lst)-1)

  for end in range(int(len(lst)-1), 0, -1):
    lst[end], lst[0] = lst[0], lst[end]
    siftdown(lst, 0, end - 1)

def siftdown(lst, start, end):  
  root = start
  while True:
    child = root * 2 + 1
    if child > end: break
    if child + 1 <= end and lst[child] < lst[child + 1]:
      child += 1
    if lst[root] < lst[child]:
      lst[root], lst[child] = lst[child], lst[root]
      root = child
    else:
      break

## Frequency Count Method

In [71]:
X_y = []
i=50
for n in range(2,i+1): # started with 2 itens until 50 itens in the list
  lprofiler = LineProfiler()
  lp_wrapper = lprofiler(heapsort)

  # create a list with random number between 1 to 10000
  # with n number in the list
  input = random.choices(range(10000), k=n)
  lp_wrapper(input)

  stats = lprofiler.get_stats()
  line_numbers = []
  hits = []

  for line in stats.timings.values():
    for i in line:
      line_numbers.append(i[0])
      hits.append(i[1])

  X_y.append([n, sum(hits)])

dados = np.array(X_y)

X = dados[:, 0]
y = dados[:, 1]
X_reshaped = X.reshape(-1, 1)

In [72]:
resultados_com_menor_loss = []
repeat = 10
registros = []
unary_operators_list = ["log", "square", "cube", "sqrt", "round", "exp", "abs"]

original_stdout = sys.stdout

with open(os.devnull, 'w') as devnull:
  sys.stdout = devnull

  for i in range(repeat):

    # first combination
    reg1 = PySRRegressor(
      #binary_operators=["*", "+"],
      unary_operators=unary_operators_list
    )

    fit1 = reg1.fit(X_reshaped, y)
    best_program1 = fit1.get_best()

    registro1 = []
    for index, value in enumerate(best_program1):
      registro1.append(value)
      
    registros.append(registro1)

    # second combination
    #reg2 = PySRRegressor(
    #  binary_operators=["*"],
    #  unary_operators=unary_operators_list
    #)
#
    #fit2 = reg2.fit(X_reshaped, y)
    #best_program2 = fit2.get_best()
#
    #registro2 = []
    #for index, value in enumerate(best_program2):
    #  registro2.append(value)
    #registros.append(registro2)
#
    ## third combinarion
    #reg3 = PySRRegressor(
    #  binary_operators=["+"],
    #  unary_operators=unary_operators_list
    #)
#
    #fit3 = reg3.fit(X_reshaped, y)
    #best_program3 = fit3.get_best()
#
    #registro3 = []
    #for index, value in enumerate(best_program3):
    #  registro3.append(value)
    #registros.append(registro3)
    
sys.stdout = original_stdout

[ Info: Started!
0.0%┣                                             ┫ 0/600 [00:00<00:-11, -0s/it]Expressions evaluated per second: [.....]. Head worker occupation: 0.0%         Press 'q' and then <enter> to stop execution early.                             Hall of Fame:                                                                   ---------------------------------------------------------------------------------------------------                                                             Complexity  Loss       Score     Equation                                       1           7.654e+03  1.594e+01  y = x₀                                        5           1.906e+03  3.476e-01  y = exp(sqrt(x₀ * 0.55306))                   7           7.762e+02  4.491e-01  y = sqrt(square(x₀) * (x₀ * 0.57979))         8           4.671e+02  5.078e-01  y = sqrt(round(square(x₀) * (x₀ * 0.311)))    9           4.671e+02  1.339e-04  y = abs(sqrt(abs(square(x₀) * (x₀ * 0.311)))) 11          3.290e+02  

In [73]:
registros_ = registros
file_pickle = 'dados_v3.pck'
for i in registros_: print(i)

[5, 0.24997818, 0.5537502122253826, '((x0 / 0.24996154) - 1.5031842)', 4.00061545468155*x0 - 1.5031842, PySRFunction(X=>4.00061545468155*x0 - 1.5031842)]
[5, 0.24989527, 0.5539151688752615, '((x0 * 4.000006) - 1.489994)', 4.000006*x0 - 1.489994, PySRFunction(X=>4.000006*x0 - 1.489994)]
[5, 0.24989545, 0.5539151589623709, '((x0 * 4.0000005) + -1.4898049)', 4.0000005*x0 - 1.4898049, PySRFunction(X=>4.0000005*x0 - 1.4898049)]
[5, 0.24989524, 0.5539152289004108, '((4.000006 * x0) - 1.489954)', 4.000006*x0 - 1.489954, PySRFunction(X=>4.000006*x0 - 1.489954)]
[5, 0.24989569, 0.5539146787617815, '((x0 - 0.37244725) * 3.9999998)', 3.9999998*x0 - 1.48978892551055, PySRFunction(X=>3.9999998*x0 - 1.48978892551055)]
[5, 0.24989542, 0.5539148687496228, '(-1.4898 + (x0 * 4.0000005))', 4.0000005*x0 - 1.4898, PySRFunction(X=>4.0000005*x0 - 1.4898)]
[14, 0.1705123, 0.37272027157852444, '(round(abs((x0 - (cube(-0.23864785) * x0)) * -4.432793) - 2.1747878) * 0.8923905)', 0.8923905*ceiling(4.4930420259042

In [74]:
for i in registros_:
  loss = i[1]
  score = i[2]
  complexity = i[0]
  w = (loss * score)/complexity
  if math.isnan(w):
    i.append(0)
  else:
    i.append(w)

lista_melhor_valor = max(registros_, key=lambda x: x[6])

## Save result

In [75]:
def salvar_dados(dados, key, arquivo):
  if os.path.exists(arquivo):
    with open(arquivo, 'rb') as f:
      dados_exist = pickle.load(f)
  else:
    dados_exist = {}
    
  valor_original = dados_exist.get(key)
  if valor_original == None:
    dados_exist.update({key: [dados]})
  else:
    if isinstance(valor_original, list):
      valor_original.append(dados)
    else:
      dados_exist.update({key: [dados]})

  with open(arquivo, 'wb') as f:
    pickle.dump(dados_exist, f)
  
caminho_arquivo = file_pickle
novos_dados = lista_melhor_valor[0:3] + [lista_melhor_valor[4]]

salvar_dados(novos_dados, 'heapsort_NlogN', caminho_arquivo)

In [76]:
def carregar_dados(arquivo):
    # Carrega os dados do arquivo pickle
    with open(arquivo, 'rb') as f:
        dados = pickle.load(f)
    return dados

caminho_arquivo = file_pickle
dados_carregados = carregar_dados(caminho_arquivo)

print("Conteúdo do arquivo pickle:")
for k, v in dados_carregados.items():
  if k == 'heapsort_NlogN':
    print('\u25CF', k)
    for index, item in enumerate(v):
      if index == len(v)-1:
        print('└─', item)
      else:
        print('├─', item)
    print('==========================')

Conteúdo do arquivo pickle:
● heapsort_NlogN
├─ [5, 0.24989547, 0.5539147687077833, 3.9999816*x0 - 1.48920982960327]
├─ [5, 0.24989581, 0.5539140884238056, 4.0*x0 - 1.48979688]
├─ [5, 0.25059167, 1.1050490982198005, 3.99930731997218*x0 - 1.4962873]
├─ [5, 0.24989547, 0.5539147687077833, 3.9999988*x0 - 1.4897596]
├─ [5, 0.24989502, 1.107831800748472, 3.9999964*x0 - 1.489875]
├─ [5, 0.24989545, 0.5539152052201823, 3.9999988*x0 - 1.4897524]
├─ [5, 0.24989559, 0.5539149251031158, 3.9999995*x0 - 1.48977357377828]
└─ [5, 0.24997818, 0.5537502122253826, 4.00061545468155*x0 - 1.5031842]
