# **Plagiarism Detection by using A star**

In [None]:
def levenshtein_distance(s1, s2):
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]

def tokenize_document(doc):
    return [sentence.strip() for sentence in doc.split('.') if sentence]

def a_star(sentences1, sentences2):
    # Align sentences between two documents using a simple path based on index.
    path = []
    for i in range(max(len(sentences1), len(sentences2))):
        if i < len(sentences1) and i < len(sentences2):
            path.append((i, i))  # Both sentences are aligned
        elif i < len(sentences1):
            path.append((i, -1))  # Sentence only in Document 1
        else:
            path.append((-1, i))  # Sentence only in Document 2
    return path

def run_test_case(doc1, doc2):
    sentences1 = tokenize_document(doc1)
    sentences2 = tokenize_document(doc2)
    alignment_path = a_star(sentences1, sentences2)

    print("Sentences from Document 1 :", sentences1)
    print("Sentences from Document 2 :", sentences2)
    print("Alignment Path :", alignment_path)

    edit_distances = []
    for i, j in alignment_path:
        if i != -1 and j != -1:  # Both sentences are aligned
            distance = levenshtein_distance(sentences1[i], sentences2[j])
            edit_distances.append(distance)
        else:
            edit_distances.append(None)  # No corresponding sentence

    return alignment_path, edit_distances

# Test Case 1 : Identical Documents
doc1 = "Airlines has to be tested. Very Quickly."
doc2 = "Airlines has to be tested. Very Quickly."
print("Test Case 1 - Identical Documents")
output = run_test_case(doc1, doc2)
print("Edit Distances :", output[1])

# Test Case 2 : Slightly Modified Document
doc1 = "Airline offers exceptional service to travelers. Passengers can enjoy in flight entertainment."
doc2 = "Airline provides outstanding service to its customers. Travelers can enjoy free meals."
print("\nTest Case 2 - Slightly Modified Documents")
output = run_test_case(doc1, doc2)
print("Edit Distances :", output[1])

# Test Case 3 : Completely Different Documents
doc1 = "We offer premium services including Business class options and first class. Majorly High Prices."
doc2 = "Travelers have access to comfortable seating. Not Affordable Majorly."
print("\nTest Case 3 - Completely Different Documents")
output = run_test_case(doc1, doc2)
print("Edit Distances :", output[1])

# Test Case 4 : Partial Overlap
doc1 = "Offering state-of-the-art technology and services. Explore new destinations."
doc2 = "Offers various tour packages that include flights and guided tours. Expert travel planners."
print("\nTest Case 4 - Partial Overlap")
output = run_test_case(doc1, doc2)
print("Edit Distances :", output[1])

Test Case 1 - Identical Documents
Sentences from Document 1 : ['Airlines has to be tested', 'Very Quickly']
Sentences from Document 2 : ['Airlines has to be tested', 'Very Quickly']
Alignment Path : [(0, 0), (1, 1)]
Edit Distances : [0, 0]

Test Case 2 - Slightly Modified Documents
Sentences from Document 1 : ['Airline offers exceptional service to travelers', 'Passengers can enjoy in flight entertainment']
Sentences from Document 2 : ['Airline provides outstanding service to its customers', 'Travelers can enjoy free meals']
Alignment Path : [(0, 0), (1, 1)]
Edit Distances : [26, 25]

Test Case 3 - Completely Different Documents
Sentences from Document 1 : ['We offer premium services including Business class options and first class', 'Majorly High Prices']
Sentences from Document 2 : ['Travelers have access to comfortable seating', 'Not Affordable Majorly']
Alignment Path : [(0, 0), (1, 1)]
Edit Distances : [59, 21]

Test Case 4 - Partial Overlap
Sentences from Document 1 : ['Offering 