In [2]:
import heapq

def split_text(text):
    return [chunk.strip() for chunk in text.split('.') if chunk.strip()]

def align_with_astar(text_A, text_B):
    part_A = split_text(text_A)
    part_B = split_text(text_B)
    len_A, len_B = len(part_A), len(part_B)

    start_state = (0, 0)
    target_state = (len_A, len_B)

    frontier = []
    heapq.heappush(frontier, (0, 0, start_state, []))
    seen = set()

    while frontier:
        f, g, (i, j), trace = heapq.heappop(frontier)

        if (i, j) in seen:
            continue
        seen.add((i, j))

        if (i, j) == target_state:
            return trace

        if i < len_A and j < len_B:
            penalty = 0 if part_A[i].lower() == part_B[j].lower() else 1
            step = trace + [(part_A[i], part_B[j], penalty)]
            heuristic = abs((len_A - (i+1)) - (len_B - (j+1)))
            heapq.heappush(frontier, (g + penalty + heuristic, g + penalty, (i+1, j+1), step))

        if i < len_A:
            step = trace + [(part_A[i], "-", 1)]
            heuristic = abs((len_A - (i+1)) - (len_B - j))
            heapq.heappush(frontier, (g + 1 + heuristic, g + 1, (i+1, j), step))

        if j < len_B:
            step = trace + [("-", part_B[j], 1)]
            heuristic = abs((len_A - i) - (len_B - (j+1)))
            heapq.heappush(frontier, (g + 1 + heuristic, g + 1, (i, j+1), step))

    return None

def similarity_score(alignment):
    if not alignment:
        return 0.0
    total_pairs = len(alignment)
    exact_matches = sum(1 for _, _, cost in alignment if cost == 0)
    return (exact_matches / total_pairs) * 100


text_A = "Artificial intelligence is a branch of computer science. It deals with machine learning. A* search is widely used."
text_B = "Artificial intelligence is a branch of computer science. It studies machine learning. A* search is widely used."

alignment_result = align_with_astar(text_A, text_B)

print("Sentence Alignment (Doc A vs Doc B):")
for left, right, cost in alignment_result:
    status = "Match" if cost == 0 else "Mismatch"
    print(f"{left:<60} | {right:<60} | {status}")

final_score = similarity_score(alignment_result)
print(f"\nOverall Similarity: {final_score:.2f}%")

Sentence Alignment (Doc A vs Doc B):
Artificial intelligence is a branch of computer science      | Artificial intelligence is a branch of computer science      | Match
It deals with machine learning                               | It studies machine learning                                  | Mismatch
A* search is widely used                                     | A* search is widely used                                     | Match

Overall Similarity: 66.67%
