In [10]:
# Import necessary libraries
import time

# Load the data
with open('../data/openstreetmap/place_names_reduced.txt', 'r') as file:
    place_names = file.read().splitlines()

# Display the first few entries
print("Sample data:", place_names[:10])

# Basic Levenshtein Distance Implementation
def levenshtein_distance(s1, s2):
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, char1 in enumerate(s1):
        current_row = [i + 1]
        for j, char2 in enumerate(s2):
            substitution_cost = 0 if char1 == char2 else 1
            current_row.append(min(
                previous_row[j + 1] + 1,  # Deletion
                current_row[j] + 1,       # Insertion
                previous_row[j] + substitution_cost  # Substitution
            ))
        previous_row = current_row

    return previous_row[-1]

# Bit-Parallel Levenshtein Distance (Placeholder for Implementation)
def bit_parallel_levenshtein(s1, s2):
    if len(s1) < len(s2):
        return bit_parallel_levenshtein(s2, s1)

    if len(s2) == 0:
        return len(s1)

    # Initialize bit vectors
    Peq = {}
    for i, char in enumerate(s2):
        Peq[char] = (Peq.get(char, 0) | (1 << i))

    # Initialize state vectors
    VP = (1 << len(s2)) - 1
    VN = 0

    for char in s1:
        # Compute the new state vectors
        X = Peq.get(char, 0) | VN
        D0 = ((VP + (X & VP)) ^ VP) | X
        HN = VP & D0
        HP = VN | ~(VP | D0)
        X = HP << 1 | 1
        VN = X & D0
        VP = (HN << 1) | ~(X | D0)

    # The Levenshtein distance is the number of set bits in VN
    return bin(VN).count('1')

# Function to evaluate performance of a given method
def evaluate_performance(method, data):
    start_time = time.time()
    for i in range(len(data)):
        for j in range(len(data)):
            if i != j:
                method(data[i], data[j])
    end_time = time.time()
    return end_time - start_time

basic_time = evaluate_performance(levenshtein_distance, place_names)
print(f"Basic Levenshtein Distance: {basic_time:.4f} seconds")

bit_parallel_time = evaluate_performance(bit_parallel_levenshtein, place_names)
print(f"Bit-Parallel Levenshtein Distance: {bit_parallel_time:.4f} seconds")

# Compare performance
print("\nPerformance Comparison:")
print(f"Basic Method: {basic_time:.4f} seconds")
print(f"Bit-Parallel Method: {bit_parallel_time:.4f} seconds")

Sample data: ['Perla Marina', 'Bohio Cabarete', 'Estación de Policia', 'Mazorra', 'Calle 100', 'Primer Anillo', 'Puente Calvario', 'Unidad 2100', 'Rotonda Cojímar', 'Vía Blanca - Rotonda Cojímar']
Basic Levenshtein Distance: 22.2261 seconds
Bit-Parallel Levenshtein Distance: 4.5742 seconds

Performance Comparison:
Basic Method: 22.2261 seconds
Bit-Parallel Method: 4.5742 seconds
