In [1]:
from LDistanceModel import ldistance

# Example input strings
str1 = "kitten"
str2 = "sitting"

# Function call to calculate Levenshtein distance
distance = ldistance(str1, str2)

# Print the result
print("Levenshtein Distance:", distance)
import json
print("Json:", json.dumps(distance))

data = distance

Levenshtein Distance: [('Substitute', 'k', 's'), ('Nothing', 'i'), ('Nothing', 't'), ('Nothing', 't'), ('Substitute', 'e', 'i'), ('Nothing', 'n'), ('Insert', 'g')]
Json: [["Substitute", "k", "s"], ["Nothing", "i"], ["Nothing", "t"], ["Nothing", "t"], ["Substitute", "e", "i"], ["Nothing", "n"], ["Insert", "g"]]


In [2]:
str1 = "kitten"
str2 = "sitting"
unique_letters = set()
unique_letters.update(str1, str2)

unique_letters

{'e', 'g', 'i', 'k', 'n', 's', 't'}

In [3]:
embedding_dim = 4

class SetEmbedding:
    def __init__(self, set_of_things, embedding_dim):
        import torch
        import torch.nn as nn
        self.embedding_dim = embedding_dim
        self.embedding = nn.Embedding(len(set_of_things), embedding_dim, padding_idx=0)

        self.ids = {thing: torch.tensor([idx]) for idx, thing in enumerate(set_of_things)}

    def __call__(self, character):
        # This is a dummy embedding; replace with your actual embedding function
        return self.embedding(self.ids[character])

embedding = SetEmbedding(set_of_things=unique_letters, embedding_dim=embedding_dim)

embedding('g')

tensor([[-1.0033,  0.9368, -1.5642,  0.5740]], grad_fn=<EmbeddingBackward0>)

In [4]:
data

[('Substitute', 'k', 's'),
 ('Nothing', 'i'),
 ('Nothing', 't'),
 ('Nothing', 't'),
 ('Substitute', 'e', 'i'),
 ('Nothing', 'n'),
 ('Insert', 'g')]

In [5]:
from LDistanceModel import operation_encoder

operation_encoder(data, embedding)

tensor([[ 0.5000,  0.0000, -0.5727, -0.5032, -1.2241,  1.0530],
        [ 0.0000,  0.5000,  1.4785, -1.2178, -0.1114, -0.9373],
        [ 0.0000,  0.0000, -1.6342, -1.8713,  1.3648, -1.5790],
        [ 0.0000,  0.0000,  0.5344, -0.7722, -1.1146, -1.4288],
        [ 0.0000,  0.0000,  0.5344, -0.7722, -1.1146, -1.4288],
        [ 0.5000,  0.0000,  0.6549,  0.6113,  0.0090, -0.5296],
        [ 0.0000,  0.5000, -1.6342, -1.8713,  1.3648, -1.5790],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  1.0000, -1.0033,  0.9368, -1.5642,  0.5740]],
       grad_fn=<CopySlices>)

In [6]:
string_pairs = [
    ("hello", "hallo"),
    ("cat", "bat"),
    ("python", "java"),
    ("same", "same"),
    ("a", "abc")
]

all_characters_in_pairs = set()
for pair in string_pairs:
    for word in pair:
        all_characters_in_pairs.update(word)

string_pair_embedder = SetEmbedding(all_characters_in_pairs, 10)

from LDistanceModel import create_batch_with_masks
create_batch_with_masks(string_pairs, string_pair_embedder)

(tensor([[[ 0.0000,  0.0000, -0.4816, -0.5635, -1.0333, -1.3106,  1.4897,
            1.4682,  0.7213, -0.1024,  1.5713, -1.3906],
          [ 0.5000,  0.0000,  0.8788,  1.0067,  0.7228, -0.3137, -0.0437,
           -0.3619,  0.2688,  1.9943,  0.7123, -0.8152],
          [ 0.0000,  0.5000, -0.3906,  3.2407,  0.3253, -1.1309,  0.3228,
            1.2674, -1.0280,  0.6302, -0.4874,  0.3016],
          [ 0.0000,  0.0000,  2.7115,  0.8182, -0.8084, -1.2800,  0.6548,
            0.1492, -0.1591, -0.5944,  0.6402, -0.8973],
          [ 0.0000,  0.0000,  2.7115,  0.8182, -0.8084, -1.2800,  0.6548,
            0.1492, -0.1591, -0.5944,  0.6402, -0.8973],
          [ 0.0000,  0.0000,  0.1882,  1.3087,  0.3298,  0.7031, -1.0170,
           -1.2989,  1.6178, -1.5466, -1.1881,  0.0997],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
            0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         

In [None]:
import graphviz

# Create a graphviz Digraph object
dot = graphviz.Digraph()

# Function to add nodes and edges to the graph in reverse order, skipping nodes with operation 'Nothing'
def add_nodes_edges_reverse_skip_nothing(data, parent=None):
    if data['operation'][0] == 'Nothing':
        if data['history']:
            add_nodes_edges_reverse_skip_nothing(data['history'], parent)
        return
    
    node_id = str(id(data))
    label = f"Distance: {data['distance']}\nOperation: {data['operation']}"
    dot.node(node_id, label)
    
    if parent:
        dot.edge(node_id, parent)
    
    if data['history']:
        add_nodes_edges_reverse_skip_nothing(data['history'], node_id)

# Add nodes and edges starting from the root
add_nodes_edges_reverse_skip_nothing(data)

# Save the graph to a file
dot.save('graph_reverse_skip_nothing_new.dot')

# Render the graph to a PNG image file
dot.render('graph_reverse_skip_nothing_new', format='png')

print("Graph has been successfully created and saved as graph_reverse_skip_nothing_new.png.")