In [None]:
# ==============================
# 0. Install
# ==============================
!pip install sentence-transformers rouge-score scikit-learn
!pip install openpyxl




In [None]:
# ==============================
# 1. Imports
# ==============================
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer
import matplotlib.pyplot as plt

sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

rouge_scorer_instance = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

In [None]:
# ==============================
# 2. Preprocess
# ==============================
def preprocess(text):
    return text.strip()

# ==============================
# 3. SBERT Similarity (3 texts)
# ==============================
def compute_sbert_similarity(text1, text2, text3):
    emb1 = sbert_model.encode(preprocess(text1), convert_to_tensor=True)
    emb2 = sbert_model.encode(preprocess(text2), convert_to_tensor=True)
    emb3 = sbert_model.encode(preprocess(text3), convert_to_tensor=True)

    sim11 = util.cos_sim(emb1, emb1).item()
    sim12 = util.cos_sim(emb1, emb2).item()
    sim13 = util.cos_sim(emb1, emb3).item()
    sim21 = util.cos_sim(emb2, emb1).item()
    sim22 = util.cos_sim(emb2, emb2).item()
    sim23 = util.cos_sim(emb2, emb3).item()
    sim31 = util.cos_sim(emb3, emb1).item()
    sim32 = util.cos_sim(emb3, emb2).item()
    sim33 = util.cos_sim(emb3, emb3).item()

    return {
        's11': sim11, 's12': sim12, 's13': sim13,
        's21': sim21, 's22': sim22, 's23': sim23,
        's31': sim31, 's32': sim32, 's33': sim33
    }

# ==============================
# 4. ROUGE-L Similarity (3 texts)
# ==============================
def compute_rouge_similarity(text1, text2, text3):
    r11 = rouge_scorer_instance.score(preprocess(text1), preprocess(text1))['rougeL'].fmeasure
    r12 = rouge_scorer_instance.score(preprocess(text1), preprocess(text2))['rougeL'].fmeasure
    r13 = rouge_scorer_instance.score(preprocess(text1), preprocess(text3))['rougeL'].fmeasure
    r21 = rouge_scorer_instance.score(preprocess(text2), preprocess(text1))['rougeL'].fmeasure
    r22 = rouge_scorer_instance.score(preprocess(text2), preprocess(text2))['rougeL'].fmeasure
    r23 = rouge_scorer_instance.score(preprocess(text2), preprocess(text3))['rougeL'].fmeasure
    r31 = rouge_scorer_instance.score(preprocess(text3), preprocess(text1))['rougeL'].fmeasure
    r32 = rouge_scorer_instance.score(preprocess(text3), preprocess(text2))['rougeL'].fmeasure
    r33 = rouge_scorer_instance.score(preprocess(text3), preprocess(text3))['rougeL'].fmeasure

    return {
        'r11': r11, 'r12': r12, 'r13': r13,
        'r21': r21, 'r22': r22, 'r23': r23,
        'r31': r31, 'r32': r32, 'r33': r33
    }

# ==============================
# 5. Combined Similarity (3 texts)
# ==============================
def compute_combined_score(text1, text2, text3):

    sbert_dict = compute_sbert_similarity(text1, text2, text3)
    rouge_dict = compute_rouge_similarity(text1, text2, text3)

    c11 = 0.85*sbert_dict['s11'] + 0.15*rouge_dict['r11']
    c12 = 0.85*sbert_dict['s12'] + 0.15*rouge_dict['r12']
    c13 = 0.85*sbert_dict['s13'] + 0.15*rouge_dict['r13']
    c21 = 0.85*sbert_dict['s21'] + 0.15*rouge_dict['r21']
    c22 = 0.85*sbert_dict['s22'] + 0.15*rouge_dict['r22']
    c23 = 0.85*sbert_dict['s23'] + 0.15*rouge_dict['r23']
    c31 = 0.85*sbert_dict['s31'] + 0.15*rouge_dict['r31']
    c32 = 0.85*sbert_dict['s32'] + 0.15*rouge_dict['r32']
    c33 = 0.85*sbert_dict['s33'] + 0.15*rouge_dict['r33']

    return {
        # SBERT
        's11': sbert_dict['s11'], 's12': sbert_dict['s12'], 's13': sbert_dict['s13'],
        's21': sbert_dict['s21'], 's22': sbert_dict['s22'], 's23': sbert_dict['s23'],
        's31': sbert_dict['s31'], 's32': sbert_dict['s32'], 's33': sbert_dict['s33'],

        # ROUGE
        'r11': rouge_dict['r11'], 'r12': rouge_dict['r12'], 'r13': rouge_dict['r13'],
        'r21': rouge_dict['r21'], 'r22': rouge_dict['r22'], 'r23': rouge_dict['r23'],
        'r31': rouge_dict['r31'], 'r32': rouge_dict['r32'], 'r33': rouge_dict['r33'],

        # Combined
        'c11': c11, 'c12': c12, 'c13': c13,
        'c21': c21, 'c22': c22, 'c23': c23,
        'c31': c31, 'c32': c32, 'c33': c33
    }

# ==============================
# 6. Main Evaluation Function
# ==============================
def evaluate_description_pairs(text1, text2, text3):
    """
    Computes SBERT, ROUGE-L, and Combined for all
    (i->j) pairs of the 3 texts.
    Returns three DataFrames (3×3 each): SBERT, ROUGE, and Combined
    """
    d = compute_combined_score(text1, text2, text3)

    sbert_data = []
    rouge_data = []
    combined_data = []

    for i in range(3):
        s_row = []
        r_row = []
        c_row = []
        for j in range(3):
            s_row.append(round(d[f's{i+1}{j+1}'], 3))
            r_row.append(round(d[f'r{i+1}{j+1}'], 3))
            c_row.append(round(d[f'c{i+1}{j+1}'], 3))
        sbert_data.append(s_row)
        rouge_data.append(r_row)
        combined_data.append(c_row)

    index_labels = [f"Text{i+1}" for i in range(3)]
    columns_labels = [f"Text{j+1}" for j in range(3)]

    df_sbert = pd.DataFrame(sbert_data, index=index_labels, columns=columns_labels)
    df_rouge = pd.DataFrame(rouge_data, index=index_labels, columns=columns_labels)
    df_combined = pd.DataFrame(combined_data, index=index_labels, columns=columns_labels)

    return df_sbert, df_rouge, df_combined
# ==============================
# 7. Example Usage
# ==============================
if __name__ == "__main__":

    txt1 = """The associated files porvide an implementation of the trie data structure. The structure provides fast  from strings to values. The code features functionaly for creating and destroying tries, inserting and  removing values from the trie, looking up values and finding the number of entries.  trie.c implements these functions by allocating trie nodes as needed along each path of characters in  the key, incrementing a use_count so the code knows when a node is no longer referenced and can be freed.  This mechanism makes insertions, lookups, and removals work in a straightforward way, each character in the  key selects the correct branch of the tree, with ample space in each node’s next array for all possible byte  values (0–255). The trie keeps track of values (void pointers) directly within each node,  along with a sentinel constant (TRIE_NULL) to mark unused entries."""
    txt2 = """The provided code implements a trie (prefix tree) data structure in C, which allows fast mapping of strings (or binary keys) to associated values. The trie supports insertion, lookup, and deletion of key-value pairs efficiently.  The implementation defines a Trie structure that holds a root node, and each node (TrieNode) maintains a value, a usage count, and an array of 256 pointers to child nodes, representing all possible byte values.  Key Functionalities: Creation & Deletion:  trie_new() initializes a new trie. trie_free() recursively deallocates all nodes. Insertion:  trie_insert() inserts a string key and its associated value. trie_insert_binary() does the same for binary keys. If a key already exists, its value is updated. Lookup:  trie_lookup() retrieves the value associated with a string key. trie_lookup_binary() does the same for binary keys. Deletion:  trie_remove() removes a key and reclaims memory if no other keys share its nodes. trie_remove_binary() is its counterpart for binary keys. Utility:  trie_num_entries() returns the total number of stored entries. The code ensures memory efficiency by freeing unused nodes when keys are deleted. It also includes rollback mechanisms to handle failed memory allocations during insertion. This trie implementation is useful for applications requiring fast prefix-based lookups, such as dictionaries, auto-completion, or routing tables."""
    txt3 = """This C code implements a trie (prefix tree) data structure, providing a flexible way to store and retrieve key-value pairs where keys are strings or binary sequences. The core of the implementation revolves around dynamically allocated nodes, each representing a character in the key and capable of holding a value (TrieValue). Each node can branch into 256 directions (one for each possible byte value), enabling fast lookup, insertion, and deletion.  The code defines functions to create (trie_new) and free (trie_free) the trie. Insertion functions (trie_insert and trie_insert_binary) add keys and associated values to the trie, either as null-terminated strings or binary data. If memory allocation fails during insertion, a rollback mechanism undoes any partial additions. Lookup functions retrieve stored values (trie_lookup, trie_lookup_binary), while removal functions (trie_remove, trie_remove_binary) delete entries and clean up unused nodes based on reference counting (use_count).  The design ensures memory efficiency and safety by recursively freeing nodes and tracking usage to avoid memory leaks. It supports general-purpose usage by treating values as opaque pointers (void *), allowing flexibility in the kind of data stored. Overall, the code offers a robust, low-level trie implementation suitable for fast associative data storage with both textual and binary keys."""

    # Compute the matrices
    df_sbert, df_rouge, df_combined = evaluate_description_pairs(txt1, txt2, txt3)

    # Print each matrix
    print("=== SBERT Similarity Matrix ===")
    print(df_sbert)

    print("\n=== ROUGE-L Similarity Matrix ===")
    print(df_rouge)

    print("\n=== Combined Similarity Matrix ===")
    print(df_combined)

    with pd.ExcelWriter("trie.xlsx", engine='openpyxl') as writer:
      df_sbert.to_excel(writer, sheet_name='SBERT', index=True)
      df_rouge.to_excel(writer, sheet_name='ROUGE-L', index=True)
      df_combined.to_excel(writer, sheet_name='Combined', index=True)


=== SBERT Similarity Matrix ===
       Text1  Text2  Text3
Text1  1.000  0.835  0.862
Text2  0.835  1.000  0.918
Text3  0.862  0.918  1.000

=== ROUGE-L Similarity Matrix ===
       Text1  Text2  Text3
Text1  1.000  0.199  0.184
Text2  0.199  1.000  0.307
Text3  0.184  0.307  1.000

=== Combined Similarity Matrix ===
       Text1  Text2  Text3
Text1   1.00  0.740  0.760
Text2   0.74  1.000  0.826
Text3   0.76  0.826  1.000

All similarity matrices saved to all_similarities.xlsx with separate sheets.
