<a href="https://colab.research.google.com/github/MehrdadJalali-KIT/LLM-ELN/blob/main/Evaluation_LLM_ELN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize
import spacy

# Define the three sets of answers
answer1 = """
The main findings from the CO adsorption experiment on ‎HKUST-1 as conducted in the ‎detailed procedural ‎documentation are not explicitly ‎stated in the provided text ‎excerpts. The document primarily ‎focuses on the methodology of ‎the experiment, covering the ‎preparation of the sample and ‎the experimental setup but does ‎not include specific results or ‎analysis data.‎

The experiment involved:‎
‎1. Preparing traps with a heating ‎process to remove contaminants.‎
‎2. Running background checks ‎in an argon atmosphere.‎
‎3. Heating the HKUST-1 sample ‎and subsequently cooling it.‎
‎4. Exposing HKUST-1 to CO at ‎room temperature.‎
‎5. Flushing the system with ‎argon to remove CO after the ‎adsorption test.‎

To find specific results such as ‎the extent of CO adsorption, ‎changes in spectral data, or any ‎modifications in the structural or ‎chemical properties of HKUST-1 ‎post-exposure, you would need ‎to access the sections or data ‎analysis parts that discuss the ‎interpretation of the recorded ‎spectra and any observed peaks ‎or changes. These details might ‎be in a separate section that ‎directly addresses the ‎experimental observations and ‎conclusions. If such sections ‎aren't included in the provided ‎documents, it may be necessary ‎to look at related experimental ‎reports or publications by the ‎research team that detail the ‎findings of the CO adsorption ‎behavior on HKUST-1.‎


"""

answer2 = """
The findings are strongly ‎dependedq on the type of ‎experiment performed.‎

Can include: isotherms, ‎storage/adsorption capacity, ‎kinetics, surface ‎structure/modification, ‎diffusion, ‎activation/adsorption ‎energies and many more


"""

answer3 = """
The main findings from carbon monoxide (CO) adsorption experiments on HKUST-1 typically include insights into its adsorption capacity, kinetics, and thermodynamics. Researchers investigate parameters such as CO uptake at different pressures, temperature dependencies, and the MOF's stability during adsorption-desorption cycles. Additionally, studies may explore the influence of factors like MOF morphology, functionalization, and guest-host interactions on CO adsorption behavior. Overall, these experiments aim to elucidate the fundamental properties of CO adsorption on HKUST-1, providing valuable information for various applications such as gas storage, separation, and catalysis.
"""

# Function to calculate cosine similarity
def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

# Function to calculate Jaccard similarity
def calculate_jaccard_similarity(text1, text2):
    words_text1 = set(word_tokenize(text1))
    words_text2 = set(word_tokenize(text2))
    intersection = words_text1.intersection(words_text2)
    union = words_text1.union(words_text2)
    return len(intersection) / len(union)

# Function to calculate BLEU score
def calculate_bleu_score(reference, candidate):
    reference_tokens = word_tokenize(reference.lower())  # Lowercase to ensure case insensitivity
    candidate_tokens = word_tokenize(candidate.lower())  # Lowercase to ensure case insensitivity
    smoothie = SmoothingFunction().method1  # Using smoothing method1
    return sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)

# Function to calculate Semantic similarity
def calculate_semantic_similarity(text1, text2):
    nlp = spacy.load('en_core_web_md')
    doc1 = nlp(text1)
    doc2 = nlp(text2)
    return doc1.similarity(doc2)

# Calculate metrics for all pairs
cosine_sim_1_2 = calculate_cosine_similarity(answer1, answer2)
cosine_sim_1_3 = calculate_cosine_similarity(answer1, answer3)
cosine_sim_2_3 = calculate_cosine_similarity(answer2, answer3)

jaccard_sim_1_2 = calculate_jaccard_similarity(answer1, answer2)
jaccard_sim_1_3 = calculate_jaccard_similarity(answer1, answer3)
jaccard_sim_2_3 = calculate_jaccard_similarity(answer2, answer3)

bleu_score_1_to_2 = calculate_bleu_score(answer1, answer2)
bleu_score_1_to_3 = calculate_bleu_score(answer1, answer3)
bleu_score_2_to_3 = calculate_bleu_score(answer2, answer3)

semantic_sim_1_2 = calculate_semantic_similarity(answer1, answer2)
semantic_sim_1_3 = calculate_semantic_similarity(answer1, answer3)
semantic_sim_2_3 = calculate_semantic_similarity(answer2, answer3)

# Check completeness of results
completeness = all([
    cosine_sim_1_2 is not None, cosine_sim_1_3 is not None, cosine_sim_2_3 is not None,
    jaccard_sim_1_2 is not None, jaccard_sim_1_3 is not None, jaccard_sim_2_3 is not None,
    bleu_score_1_to_2 is not None, bleu_score_1_to_3 is not None, bleu_score_2_to_3 is not None,
    semantic_sim_1_2 is not None, semantic_sim_1_3 is not None, semantic_sim_2_3 is not None
])

# Print results
print("Cosine Similarity:")
print(f"1-2: {cosine_sim_1_2:.4f}, 1-3: {cosine_sim_1_3:.4f}, 2-3: {cosine_sim_2_3:.4f}")
print("\nJaccard Similarity:")
print(f"1-2: {jaccard_sim_1_2:.4f}, 1-3: {jaccard_sim_1_3:.4f}, 2-3: {jaccard_sim_2_3:.4f}")
print("\nBLEU Score:")
print(f"1 to 2: {bleu_score_1_to_2:.4f}, 1 to 3: {bleu_score_1_to_3:.4f}, 2 to 3: {bleu_score_2_to_3:.4f}")
print("\nSemantic Similarity:")
print(f"1-2: {semantic_sim_1_2:.4f}, 1-3: {semantic_sim_1_3:.4f}, 2-3: {semantic_sim_2_3:.4f}")

# Print completeness
print(f"\nCompleteness: {'Complete' if completeness else 'Incomplete'}")
