Consider the example 2 from Q1:
You start with the sequence ABABAECCEC, or in general any sequence made from A,B, C, and E. You can transform this sequence using the following equalities: AC = E,AB = BC, BB = E, CC = E, and E x = x for any x . For example, ABBC can be transformed into AEC, and then AC, and then E. Your goal is to produce the sequence E.
Define an admissible heuristic for the problem and write an A* algorithm to solve the problem.

In [2]:
import re 
from itertools import combinations

In [3]:
import heapq
from itertools import combinations


def heuristic(str1, str2):
    """
    :param str1:
    :param str2:
    :return: difference in length of two strings
    """
    return abs(len(str1) - len(str2))
    # equalities_set = set(equality_dict.keys())
    # return len(neighbor) - sum([word in neighbor for word in equalities_set])
    # return len(neighbor) - [n for n in neighbor].count('E')


def get_path(predecessors, start, goal):
    result = [goal]
    current = goal
    while result[-1] != start:
        result.append(predecessors[result[-1]])
    return result


def get_neighbors(current_cell, equality_dict):
    dna_string = current_cell[1]
    equalities_set = set(equality_dict.keys())
    word_pair_set = set(["".join(comb) for comb in combinations(dna_string, 2) if "".join(comb) in dna_string])
    possible_steps = equalities_set.intersection(word_pair_set)
    return [dna_string.replace(word, equality_dict[word]) for word in possible_steps]


def a_star(start, goal):
    equality_dict = {'AC': 'E', 'AB': 'BC', 'BB': 'E', 'CC': 'E', 'EA': 'A', 'EB': 'B', 'EC': 'C', 'EE': 'E','BC':'AB'}
    pq = []
    heapq.heappush(pq, (0,start))
    predecessors = {start: None}
    # Distance from start to current cell
    g_value = {start: 0}

    while pq:
        current_cell = heapq.heappop(pq)
        if current_cell[1] == goal:
            return get_path(predecessors, start, goal)
        neighbors = get_neighbors(current_cell, equality_dict)
        for neighbor in neighbors:
            if neighbor not in g_value:
                new_cost = g_value[current_cell[1]] + 1
                g_value[neighbor] = new_cost
                f_value = new_cost + heuristic(goal, neighbor)
                heapq.heappush(pq, (f_value,neighbor))
                predecessors[neighbor] = current_cell[1]
    return None
  

In [4]:
  print(a_star("ABABAECCEC","E"))

['E', 'AC', 'AEC', 'ABBC', 'ABBEC', 'BCBEC', 'BCBCCC', 'ABABCC', 'ABABECC', 'ABABACCC', 'ABABAECCEC']
