In [1]:
import pickle
import pandas as pd
import numpy as np

In [2]:
# The implementation of Levenshtein Distance was guided by this article:
# https://blog.paperspace.com/implementing-levenshtein-distance-word-autocomplete-autocorrect/

def Lev_Dist(string1, string2):
    distances = np.zeros((len(string1) + 1, len(string2) + 1), dtype="int")
    
    #Initialize matrix by turning column 1 and row 1 into the number of characters
    #required to construct the string from scratch
    
    for i in range(len(string1) + 1):
        distances[i][0] = i
    for j in range(len(string2) + 1):
        distances[0][j] = j
    
    # Distances are calculated in a 2x2 matrix fashion,
    # the position (2,2) is what we are calculating and
    # it if the characters in the parent strings (string1,
    # string2) are different the (2,2) position is equal to
    # the minimum value of the other 3 cells + 1.
    
    # So we are interested in the (1,2), (2,1) and (1,1)
    # positions which will be labeled, col, row, diag,
    # respectively.

    # The values need to be initialized
    row = 0
    column = 0
    diag = 0
    
    # We will loop through every position in the distances
    # matrix, calculating the distance of the (2,2) position
    # of each sub-matrix as we progress.
    
    for i in range(1, len(string1) + 1):
        for j in range(1, len(string2) + 1):
            if string1[i-1] == string2[j-1]:
                distances[i][j] = distances[i-1][j-1]
            else:
                col = distances[i - 1][j]
                row = distances[i][j - 1]
                diag = distances[i-1][j-1]
                distances[i][j] = min([row, col, diag]) + 1
    return distances[len(string1)][len(string2)]

def Lev_Dist_Qualities_Name(beer_name, qualities_names):
    min_dist = Lev_Dist(beer_name, qualities_names[0])
    index = 0
    for i in range(1, len(qualities_names)):
        if min_dist == 0:
            return(min_dist, qualities_names[i])
        dist = Lev_Dist(beer_name, qualities_names[i])
        if dist < min_dist:
            index = i
            min_dist = dist
    return(min_dist, qualities_names[index])

In [3]:
l3_fh = open("l3", "rb")
l3 = pickle.load(l3_fh)
l3_fh.close()

qual_names_fh = open("qual_names", "rb")
qual_names = pickle.load(qual_names_fh)
qual_names_fh.close()

l3_possible_matches = {}
for beer in l3:
    l3_possible_matches[beer] = Lev_Dist_Qualities_Name(beer, qual_names)   

In [4]:
l3_dict_fh = open("l3_dict", "wb")
pickle.dump(l3_possible_matches, l3_dict_fh)
l3_dict_fh.close()