In [10]:
# https://rosettacode.org/wiki/Levenshtein_distance#Python

import numpy as np
from line_profiler import LineProfiler
from pysr import PySRRegressor
import random
import string
import os
import sys
import math
import pickle

## Levenshtein Distance Algorithm

In [11]:
def levenshteinDistance(str1, str2): # M*N
  m = len(str1)
  n = len(str2)
  d = [[i] for i in range(1, m + 1)]   # d matrix rows
  d.insert(0, list(range(0, n + 1)))   # d matrix columns
  for j in range(1, n + 1): # A 
    for i in range(1, m + 1): # B
      if str1[i - 1] == str2[j - 1]: # C     # Python (string) is 0-based
        substitutionCost = 0
      else: # D
        substitutionCost = 1
      d[i].insert(j, min(d[i - 1][j] + 1,
                          d[i][j - 1] + 1,
                          d[i - 1][j - 1] + substitutionCost))

In [12]:
def generate_random_string(length):
    return ''.join(random.choice(string.ascii_lowercase) for _ in range(length))

## Frequency Count Method

In [13]:
X_y = []
x1 = []
x2 =[]
y = []
i=50
for n in range(2,i+1): # started with 2 itens until 50 itens in the list
  lprofiler = LineProfiler()
  lp_wrapper = lprofiler(levenshteinDistance)

  # create 2 random strings with diferents lenght
  string1 = generate_random_string(n)
  string2 = generate_random_string(n*2)

  lp_wrapper(string1, string2)

  stats = lprofiler.get_stats()
  line_numbers = []
  hits = []

  for line in stats.timings.values():
    for i in line:
      line_numbers.append(i[0])
      hits.append(i[1])

  x1.append(n)
  x2.append(n*2)
  y.append(sum(hits))

X_reshaped = np.column_stack((x1, x2))
y_np = np.array(y)


In [14]:
resultados_com_menor_loss = []
repeat = 5
registros = []
unary_operators_list = ["log", "square", "cube", "sqrt", "round", "exp", "abs"]

original_stdout = sys.stdout

with open(os.devnull, 'w') as devnull:
  sys.stdout = devnull

  for i in range(repeat):

    # first combination
    reg1 = PySRRegressor(
      unary_operators=unary_operators_list
    )

    fit1 = reg1.fit(X_reshaped, y)
    best_program1 = fit1.get_best()

    registro1 = []
    for index, value in enumerate(best_program1):
      registro1.append(value)
      
    registros.append(registro1)
    
sys.stdout = original_stdout



[ Info: Started!
0.0%┣                                             ┫ 0/600 [00:00<00:-42, -0s/it]Expressions evaluated per second: [.....]. Head worker occupation: 0.0%         Press 'q' and then <enter> to stop execution early.                             Hall of Fame:                                                                   ---------------------------------------------------------------------------------------------------                                                             Complexity  Loss       Score     Equation                                       1           2.647e+08  1.594e+01  y = x₁                                        2           2.301e+08  1.401e-01  y = square(x₀)                                3           1.963e+08  1.588e-01  y = (x₀ * x₁)                                 6           9.494e+06  1.010e+00  y = ((square(-2.3917) * x₀) * x₁)             8           2.658e+06  6.365e-01  y = square((x₀ + (x₁ + -2.5348)) + x₀)        10          1.312e+06  

In [15]:
registros_ = registros
file_pickle = 'dados_v3.pck'

In [16]:
for i in registros_:
  loss = i[1]
  score = i[2]
  complexity = i[0]
  w = (loss * score)/complexity
  if math.isnan(w):
    i.append(0)
  else:
    i.append(w)

lista_melhor_valor = max(registros_, key=lambda x: x[6])

  w = (loss * score)/complexity


## Save result

In [17]:
def salvar_dados(dados, key, arquivo):
  if os.path.exists(arquivo):
    with open(arquivo, 'rb') as f:
      dados_exist = pickle.load(f)
  else:
    dados_exist = {}
    
  valor_original = dados_exist.get(key)
  if valor_original == None:
    dados_exist.update({key: [dados]})
  else:
    if isinstance(valor_original, list):
      valor_original.append(dados)
    else:
      dados_exist.update({key: [dados]})

  with open(arquivo, 'wb') as f:
    pickle.dump(dados_exist, f)

caminho_arquivo = file_pickle
novos_dados = lista_melhor_valor[0:3] + [lista_melhor_valor[4]]

salvar_dados(novos_dados, 'levenshtein_distance_MN', caminho_arquivo)

In [18]:
def carregar_dados(arquivo):
    # Carrega os dados do arquivo pickle
    with open(arquivo, 'rb') as f:
        dados = pickle.load(f)
    return dados

caminho_arquivo = file_pickle
dados_carregados = carregar_dados(caminho_arquivo)

print("Conteúdo do arquivo pickle:")
for k, v in dados_carregados.items():
  print('\u25CF', k)
  for index, item in enumerate(v):
    if index == len(v)-1:
       print('└─', item)
    else:
      print('├─', item)
  print('==========================')

Conteúdo do arquivo pickle:
● binary_search_logx
├─ [6, 1.2914093, 1.37589542429269, log(x0**6) + 5.574131]
└─ [5, 1.5291592, 2.19479656667366, log(x0)**2 + 13.966276]
● boyer_moore_M_plus_N
├─ [9, 1729031400.0, 0.1972007458935316, 752018330.701125*(0.00603869219560025*x0 + 0.00603869219560025*x1 - 1)**4 + 601415.75]
└─ [5, 15754614000.0, 0.3055240605113849, 3621132.2 - 28185.828*x1]
● bubblesort_N^2
├─ [4, 5910.4966, 3.4645076652491285, 1.5253483*x0**2]
└─ [4, 8542.308, 3.303255717418494, 1.5552218*x0**2]
● fibonacci_interative_N
├─ [3, 0.20818244, 3.412549040667958, 2.058653*x0]
└─ [3, 0.20818244, 3.412549040667958, 2.058653*x0]
● fibonacci_recursive_2^N
├─ [15, 111113150.0, 0.37865996607381686, x0**4*(0.0022606994*x0**2 - 0.714783)]
└─ [15, 77371830.0, 0.38657145558895856, x0**2*(x0**2*(0.16452532*x0 - 3.8766947) + 297.9323)]
● heapsort_NlogN
├─ [9, 0.24989581, 1.1851428541960574, 4*x0 - 1.4897935]
└─ [9, 0.24989575, 1.185128414350167, 4*x0 - 1.4897884]
● insertionsort_N^2
├─ [4, 12