In [1]:
import networkx as nx
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Input triplets
triplets = [
    ("Diabetes", "causes", "Neuropathy"),
    ("Diabetes", "increases risk of", "Kidney Disease"),
    ("Diabetes", "treated with", "Metformin"),
    ("Diabetes", "managed by", "Lifestyle Changes"),
    ("Kidney Disease", "leads to", "Dialysis Requirement"),
    ("Kidney Disease", "caused by", "Hypertension"),
    ("Hypertension", "increases risk of", "Heart Disease"),
    ("Hypertension", "treated with", "ACE Inhibitors"),
    ("Heart Disease", "leads to", "Heart Failure"),
    ("Heart Disease", "caused by", "High Cholesterol"),
    ("High Cholesterol", "treated with", "Statins"),
    ("High Cholesterol", "linked to", "Obesity"),
    ("Obesity", "causes", "Diabetes"),
    ("Obesity", "managed by", "Exercise"),
    ("Exercise", "reduces risk of", "Hypertension"),
    ("Exercise", "recommended for", "Heart Health"),
    ("Smoking", "causes", "Lung Cancer"),
    ("Smoking", "increases risk of", "Heart Disease"),
    ("Lung Cancer", "treated with", "Chemotherapy"),
    ("Chemotherapy", "causes", "Fatigue"),
    ("Fatigue", "managed by", "Physical Therapy"),
    ("Physical Therapy", "recommended for", "Muscle Weakness"),
    ("Muscle Weakness", "associated with", "Vitamin D Deficiency"),
    ("Vitamin D Deficiency", "causes", "Osteoporosis"),
    ("Osteoporosis", "leads to", "Bone Fractures"),
    ("Bone Fractures", "treated with", "Calcium Supplements"),
    ("Calcium Supplements", "recommended for", "Osteoporosis"),
    ("Diabetes", "causes", "Retinopathy"),
    ("Retinopathy", "treated with", "Laser Therapy"),
    ("Laser Therapy", "used in", "Ophthalmology"),
    ("Ophthalmology", "treats", "Vision Disorders"),
    ("Kidney Disease", "linked to", "High Blood Pressure"),
    ("High Blood Pressure", "causes", "Stroke"),
    ("Stroke", "treated with", "Rehabilitation Therapy"),
    ("Rehabilitation Therapy", "improves", "Motor Function"),
    ("Motor Function", "affected by", "Nerve Damage"),
    ("Nerve Damage", "caused by", "Diabetes"),
    ("Heart Disease", "causes", "Chest Pain"),
    ("Chest Pain", "diagnosed by", "ECG"),
    ("ECG", "used in", "Cardiology"),
    ("Cardiology", "treats", "Heart Disorders"),
    ("Heart Disorders", "linked to", "Hypertension"),
    ("Hypertension", "monitored by", "Blood Pressure Monitors"),
    ("Blood Pressure Monitors", "recommended for", "Home Monitoring"),
    ("Obesity", "linked to", "Sleep Apnea"),
    ("Sleep Apnea", "managed by", "CPAP Machines"),
    ("CPAP Machines", "improve", "Breathing Patterns"),
    ("Breathing Patterns", "affected by", "Asthma"),
    ("Asthma", "triggered by", "Airborne Allergens"),
    ("Airborne Allergens", "include", "Pollen"),
    ("Pollen", "causes", "Seasonal Allergies"),
    ("Seasonal Allergies", "treated with", "Antihistamines"),
    ("Antihistamines", "reduce", "Allergic Reactions"),
    ("Allergic Reactions", "cause", "Skin Rashes"),
    ("Skin Rashes", "treated with", "Topical Creams"),
    ("Topical Creams", "recommended for", "Eczema"),
    ("Eczema", "causes", "Skin Irritation"),
    ("Skin Irritation", "managed by", "Moisturizers"),
    ("Moisturizers", "reduce", "Dry Skin"),
    ("Dry Skin", "linked to", "Dehydration"),
    ("Dehydration", "caused by", "Heat Stroke"),
    ("Heat Stroke", "managed by", "Rehydration Therapy"),
    ("Rehydration Therapy", "prevents", "Electrolyte Imbalance"),
    ("Electrolyte Imbalance", "causes", "Muscle Cramps"),
    ("Muscle Cramps", "treated with", "Magnesium Supplements"),
    ("Magnesium Supplements", "recommended for", "Muscle Health"),
    ("Muscle Health", "improved by", "Exercise"),
    ("Exercise", "recommended for", "Weight Management"),
    ("Weight Management", "reduces risk of", "Diabetes"),
    ("Diabetes", "causes", "Peripheral Neuropathy"),
    ("Peripheral Neuropathy", "treated with", "Pain Relievers"),
    ("Pain Relievers", "used for", "Chronic Pain"),
    ("Chronic Pain", "managed by", "Physical Therapy"),
    ("Physical Therapy", "improves", "Joint Mobility"),
    ("Joint Mobility", "reduced by", "Arthritis"),
    ("Arthritis", "treated with", "Anti-Inflammatory Drugs"),
    ("Anti-Inflammatory Drugs", "reduce", "Joint Swelling"),
    ("Joint Swelling", "causes", "Reduced Mobility"),
    ("Reduced Mobility", "managed by", "Assistive Devices"),
    ("Assistive Devices", "recommended for", "Elderly Patients"),
    ("Elderly Patients", "at risk of", "Osteoporosis"),
    ("Osteoporosis", "causes", "Fractures"),
    ("Fractures", "treated with", "Bone Grafts"),
    ("Bone Grafts", "used in", "Orthopedic Surgery"),
    ("Orthopedic Surgery", "required for", "Severe Fractures"),
    ("Severe Fractures", "lead to", "Reduced Quality of Life"),
    ("Reduced Quality of Life", "improved by", "Rehabilitation Therapy"),
    ("Rehabilitation Therapy", "supports", "Mental Health"),
    ("Mental Health", "affected by", "Chronic Illness"),
    ("Chronic Illness", "linked to", "Depression"),
    ("Depression", "treated with", "SSRIs"),
    ("SSRIs", "used for", "Anxiety Disorders"),
    ("Anxiety Disorders", "cause", "Sleep Disturbances"),
    ("Sleep Disturbances", "managed by", "Melatonin Supplements"),
    ("Melatonin Supplements", "improve", "Sleep Quality"),
    ("Sleep Quality", "linked to", "Mental Well-being"),
    ("Mental Well-being", "improved by", "Exercise"),
    ("Exercise", "benefits", "Cardiovascular Health"),
    ("Cardiovascular Health", "enhanced by", "Heart-Healthy Diet"),
    ("Heart-Healthy Diet", "reduces", "Cholesterol Levels"),
    ("Cholesterol Levels", "monitored by", "Blood Tests"),
    ("Blood Tests", "used to", "Diagnose Conditions"),
    ("Conditions", "include", "Diabetes"),
]


# Build the knowledge graph
def BuildKG(triplets):
    G = nx.DiGraph()
    RELdir = {}
    for head, relation, tail in triplets:
        G.add_edge(head, tail, label=relation)
        RELdir[(head, tail)] = relation
    return G, RELdir

# Hardcoded ground truths
def hardcoded_ground_truth():
    return [
        "Diabetes can cause complications such as nerve damage, kidney disease, eye damage (retinopathy), heart disease, and poor wound healing.",
        "Diabetes increases the risk of cardiovascular disease, stroke, kidney failure, blindness, neuropathy, and lower limb amputation.",
        "Diabetes is managed or treated using insulin therapy, oral medications, diet control, exercise, and lifestyle changes.",
        "Smoking causes lung cancer, chronic obstructive pulmonary disease (COPD), heart disease, stroke, and respiratory infections.",
        "Hypertension increases the risk of heart disease, stroke, kidney failure, aneurysms, and vision loss."
    ]

# Retrieve responses from knowledge graph
def retrieve_from_graph(query, G, RELdir):
    entity, relation_type = query.split(" ")[2], " ".join(query.split(" ")[3:-1])
    results = []
    for (head, tail), relation in RELdir.items():
        if head == entity and relation_type.lower() in relation.lower():
            results.append(f"{head} {relation} {tail}.")
    return results if results else ["No information found."]

# Semantic similarity evaluation
def evaluate_semantic_similarity(generated_responses, ground_truth):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    scores = []
    for gen, truth in zip(generated_responses, ground_truth):
        gen_embedding = model.encode(gen, convert_to_tensor=True).cpu()
        truth_embedding = model.encode(truth, convert_to_tensor=True).cpu()
        similarity = cosine_similarity(gen_embedding.numpy().reshape(1, -1), truth_embedding.numpy().reshape(1, -1))
        scores.append(similarity[0][0])
    return scores

# Main Execution
if __name__ == "__main__":
    # Queries
    queries = [
        "What does Diabetes cause?",
        "What does Diabetes increase risk of?",
        "What does Diabetes treat?",
        "What does Smoking cause?",
        "What does Hypertension increase risk of?"
    ]

    # Build graph and relationships
    G, RELdir = BuildKG(triplets)

    # Generate responses from the graph
    generated_responses = []
    for query in queries:
        response = retrieve_from_graph(query, G, RELdir)
        generated_responses.append(response[0] if response else "No information found.")

    # Hardcoded ground truths
    ground_truth = hardcoded_ground_truth()

    # Evaluate semantic similarity
    scores = evaluate_semantic_similarity(generated_responses, ground_truth)

    # Print results
    for i, (query, generated, truth, score) in enumerate(zip(queries, generated_responses, ground_truth, scores)):
        print(f"Query {i+1}: {query}")
        print(f"Generated Response: {generated}")
        print(f"Ground Truth: {truth}")
        print(f"Semantic Similarity Score: {score:.4f}\n")


Query 1: What does Diabetes cause?
Generated Response: Diabetes causes Neuropathy.
Ground Truth: Diabetes can cause complications such as nerve damage, kidney disease, eye damage (retinopathy), heart disease, and poor wound healing.
Semantic Similarity Score: 0.6972

Query 2: What does Diabetes increase risk of?
Generated Response: No information found.
Ground Truth: Diabetes increases the risk of cardiovascular disease, stroke, kidney failure, blindness, neuropathy, and lower limb amputation.
Semantic Similarity Score: 0.0683

Query 3: What does Diabetes treat?
Generated Response: Diabetes causes Neuropathy.
Ground Truth: Diabetes is managed or treated using insulin therapy, oral medications, diet control, exercise, and lifestyle changes.
Semantic Similarity Score: 0.4538

Query 4: What does Smoking cause?
Generated Response: Smoking causes Lung Cancer.
Ground Truth: Smoking causes lung cancer, chronic obstructive pulmonary disease (COPD), heart disease, stroke, and respiratory infect

In [2]:
import networkx as nx
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.metrics.pairwise import cosine_similarity
import torch

# Input triplets
triplets = [
    ("Diabetes", "causes", "Neuropathy"),
    ("Diabetes", "increases risk of", "Kidney Disease"),
    ("Diabetes", "treated with", "Metformin"),
    ("Smoking", "causes", "Lung Cancer"),
    ("Hypertension", "increases risk of", "Heart Disease"),
]

# Build the knowledge graph
def BuildKG(triplets):
    G = nx.DiGraph()
    RELdir = {}
    for head, relation, tail in triplets:
        G.add_edge(head, tail, label=relation)
        RELdir[(head, tail)] = relation
    return G, RELdir

# Hardcoded ground truths
def hardcoded_ground_truth():
    return [
        "Diabetes can cause complications such as nerve damage, kidney disease, eye damage (retinopathy), heart disease, and poor wound healing.",
        "Diabetes increases the risk of cardiovascular disease, stroke, kidney failure, blindness, neuropathy, and lower limb amputation.",
        "Diabetes is managed or treated using insulin therapy, oral medications like metaformin, diet control, exercise, and lifestyle changes.",
        "Smoking causes lung cancer, chronic obstructive pulmonary disease (COPD), heart disease, stroke, and respiratory infections.",
        "Hypertension increases the risk of heart disease, stroke, kidney failure, aneurysms, and vision loss."
    ]

# Semantic Search: Build relation embeddings
def build_relation_embeddings(triplets, model):
    """Create embeddings for triplet relations using Sentence Transformers."""
    relations = [" ".join([head, relation, tail]) for head, relation, tail in triplets]
    relation_embeddings = model.encode(relations, convert_to_tensor=True)
    return relations, relation_embeddings

def retrieve_using_embeddings(query, relations, relation_embeddings, model, top_k=3):
    """Retrieve top-k matching relations using semantic similarity."""
    query_embedding = model.encode(query, convert_to_tensor=True)
    similarity_scores = cosine_similarity(query_embedding.cpu().numpy().reshape(1, -1),
                                          relation_embeddings.cpu().numpy())[0]
    top_indices = similarity_scores.argsort()[-top_k:][::-1]
    return [(relations[i], similarity_scores[i]) for i in top_indices]

# Query Expansion using GPT-Neo
def expand_query_with_gpt(query):
    """Expand the query using GPT-Neo."""
    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
    model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M")

    prompt = f"Expand the query: '{query}' into more specific and related questions."
    inputs = tokenizer(prompt, return_tensors="pt", max_length=50, truncation=True)
    outputs = model.generate(**inputs, max_new_tokens=50, num_return_sequences=1)
    expanded_query = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return expanded_query.split(". ")

# Cosine Similarity Evaluation
def evaluate_similarity_with_ground_truth(model, generated_responses, ground_truth):
    """Evaluate cosine similarity between generated responses and ground truth answers."""
    scores = []
    for gen, truth in zip(generated_responses, ground_truth):
        gen_embedding = model.encode(gen, convert_to_tensor=True).cpu().numpy()
        truth_embedding = model.encode(truth, convert_to_tensor=True).cpu().numpy()
        similarity = cosine_similarity(gen_embedding.reshape(1, -1), truth_embedding.reshape(1, -1))
        scores.append(similarity[0][0])
    return scores

# Main Execution
if __name__ == "__main__":
    # Queries
    queries = [
        "What does Diabetes cause?",
        "What does Diabetes increase risk of?",
        "How is Diabetes treated?",
        "What does Smoking cause?",
        "What does Hypertension increase risk of?"
    ]

    # Build graph and relationships
    G, RELdir = BuildKG(triplets)

    # Initialize Sentence Transformer model
    model = SentenceTransformer('all-MiniLM-L6-v2')

    # Build relation embeddings
    relations, relation_embeddings = build_relation_embeddings(triplets, model)

    # Retrieve responses using semantic search
    generated_responses = []
    for query in queries:
        # Expand query using GPT-Neo
        print(f"\nOriginal Query: {query}")
        expanded_queries = expand_query_with_gpt(query)
        print(f"Expanded Queries: {expanded_queries}")

        # Use expanded queries to retrieve best match
        best_response = None
        best_score = -1
        for expanded_query in expanded_queries:
            top_matches = retrieve_using_embeddings(expanded_query, relations, relation_embeddings, model)
            if top_matches and top_matches[0][1] > best_score:
                best_response, best_score = top_matches[0]

        generated_responses.append(best_response if best_response else "No information found.")

    # Hardcoded ground truths
    ground_truth = hardcoded_ground_truth()

    # Evaluate semantic similarity with ground truths
    similarity_scores = evaluate_similarity_with_ground_truth(model, generated_responses, ground_truth)

    # Print Results
    print("\nFinal Results:")
    for i, (query, generated, truth, score) in enumerate(zip(queries, generated_responses, ground_truth, similarity_scores)):
        print(f"\nQuery {i+1}: {query}")
        print(f"Generated Response: {generated}")
        print(f"Ground Truth: {truth}")
        print(f"Cosine Similarity Score: {score:.4f}")



Original Query: What does Diabetes cause?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Expanded Queries: ["Expand the query: 'What does Diabetes cause?' into more specific and related questions.\n\nThe following is a sample of the query:\n\nSELECT * FROM `diet_data` WHERE `diet_id` = '1' AND `diet_type` = 'diabetes' AND `diet_"]

Original Query: What does Diabetes increase risk of?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Expanded Queries: ["Expand the query: 'What does Diabetes increase risk of?' into more specific and related questions.\n\nThe following is a list of the most common diabetes-related questions asked by the diabetes community:\n\nWhat is the risk of diabetes?\n\nWhat is the risk of diabetes?\n\nWhat is the risk of diabetes?\n\n"]

Original Query: How is Diabetes treated?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Expanded Queries: ["Expand the query: 'How is Diabetes treated?' into more specific and related questions.\n\nThe following is a sample of the data:\n\nThe data is from the Diabetes Treatment Database (DTD) and the data is from the Diabetes Treatment Database (DTD) and the data is from the Diabetes Treatment Database (DTD"]

Original Query: What does Smoking cause?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Expanded Queries: ["Expand the query: 'What does Smoking cause?' into more specific and related questions.\n\nA:\n\nI think you are looking for the following query:\nSELECT * FROM `tables` WHERE `tables`.`id` = '1' AND `tables`.`name` ='smoking'"]

Original Query: What does Hypertension increase risk of?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Expanded Queries: ["Expand the query: 'What does Hypertension increase risk of?' into more specific and related questions.\n\nThe following is a list of the most common questions asked by the health care industry.\n\n1", 'What is Hypertension?\n\n2', 'What is Hypertension?\n\n3', 'What is Hypertension?\n']

Final Results:

Query 1: What does Diabetes cause?
Generated Response: Diabetes causes Neuropathy
Ground Truth: Diabetes can cause complications such as nerve damage, kidney disease, eye damage (retinopathy), heart disease, and poor wound healing.
Cosine Similarity Score: 0.6830

Query 2: What does Diabetes increase risk of?
Generated Response: Diabetes increases risk of Kidney Disease
Ground Truth: Diabetes increases the risk of cardiovascular disease, stroke, kidney failure, blindness, neuropathy, and lower limb amputation.
Cosine Similarity Score: 0.7473

Query 3: How is Diabetes treated?
Generated Response: Diabetes treated with Metformin
Ground Truth: Diabetes is managed or tre

In [3]:
import networkx as nx
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Input triplets
triplets = [
    ("Diabetes", "causes", "Neuropathy"),
    ("Diabetes", "increases risk of", "Kidney Disease"),
    ("Diabetes", "treated with", "Metformin"),
    ("Diabetes", "managed by", "Lifestyle Changes"),
    ("Kidney Disease", "leads to", "Dialysis Requirement"),
    ("Kidney Disease", "caused by", "Hypertension"),
    ("Hypertension", "increases risk of", "Heart Disease"),
    ("Hypertension", "treated with", "ACE Inhibitors"),
    ("Heart Disease", "leads to", "Heart Failure"),
    ("Heart Disease", "caused by", "High Cholesterol"),
    ("High Cholesterol", "treated with", "Statins"),
    ("High Cholesterol", "linked to", "Obesity"),
    ("Obesity", "causes", "Diabetes"),
    ("Obesity", "managed by", "Exercise"),
    ("Exercise", "reduces risk of", "Hypertension"),
    ("Exercise", "recommended for", "Heart Health"),
    ("Smoking", "causes", "Lung Cancer"),
    ("Smoking", "increases risk of", "Heart Disease"),
    ("Lung Cancer", "treated with", "Chemotherapy"),
    ("Chemotherapy", "causes", "Fatigue"),
    ("Fatigue", "managed by", "Physical Therapy"),
    ("Physical Therapy", "recommended for", "Muscle Weakness"),
    ("Muscle Weakness", "associated with", "Vitamin D Deficiency"),
    ("Vitamin D Deficiency", "causes", "Osteoporosis"),
    ("Osteoporosis", "leads to", "Bone Fractures"),
    ("Bone Fractures", "treated with", "Calcium Supplements"),
    ("Calcium Supplements", "recommended for", "Osteoporosis"),
    ("Diabetes", "causes", "Retinopathy"),
    ("Retinopathy", "treated with", "Laser Therapy"),
    ("Laser Therapy", "used in", "Ophthalmology"),
    ("Ophthalmology", "treats", "Vision Disorders"),
    ("Kidney Disease", "linked to", "High Blood Pressure"),
    ("High Blood Pressure", "causes", "Stroke"),
    ("Stroke", "treated with", "Rehabilitation Therapy"),
    ("Rehabilitation Therapy", "improves", "Motor Function"),
    ("Motor Function", "affected by", "Nerve Damage"),
    ("Nerve Damage", "caused by", "Diabetes"),
    ("Heart Disease", "causes", "Chest Pain"),
    ("Chest Pain", "diagnosed by", "ECG"),
    ("ECG", "used in", "Cardiology"),
    ("Cardiology", "treats", "Heart Disorders"),
    ("Heart Disorders", "linked to", "Hypertension"),
    ("Hypertension", "monitored by", "Blood Pressure Monitors"),
    ("Blood Pressure Monitors", "recommended for", "Home Monitoring"),
    ("Obesity", "linked to", "Sleep Apnea"),
    ("Sleep Apnea", "managed by", "CPAP Machines"),
    ("CPAP Machines", "improve", "Breathing Patterns"),
    ("Breathing Patterns", "affected by", "Asthma"),
    ("Asthma", "triggered by", "Airborne Allergens"),
    ("Airborne Allergens", "include", "Pollen"),
    ("Pollen", "causes", "Seasonal Allergies"),
    ("Seasonal Allergies", "treated with", "Antihistamines"),
    ("Antihistamines", "reduce", "Allergic Reactions"),
    ("Allergic Reactions", "cause", "Skin Rashes"),
    ("Skin Rashes", "treated with", "Topical Creams"),
    ("Topical Creams", "recommended for", "Eczema"),
    ("Eczema", "causes", "Skin Irritation"),
    ("Skin Irritation", "managed by", "Moisturizers"),
    ("Moisturizers", "reduce", "Dry Skin"),
    ("Dry Skin", "linked to", "Dehydration"),
    ("Dehydration", "caused by", "Heat Stroke"),
    ("Heat Stroke", "managed by", "Rehydration Therapy"),
    ("Rehydration Therapy", "prevents", "Electrolyte Imbalance"),
    ("Electrolyte Imbalance", "causes", "Muscle Cramps"),
    ("Muscle Cramps", "treated with", "Magnesium Supplements"),
    ("Magnesium Supplements", "recommended for", "Muscle Health"),
    ("Muscle Health", "improved by", "Exercise"),
    ("Exercise", "recommended for", "Weight Management"),
    ("Weight Management", "reduces risk of", "Diabetes"),
    ("Diabetes", "causes", "Peripheral Neuropathy"),
    ("Peripheral Neuropathy", "treated with", "Pain Relievers"),
    ("Pain Relievers", "used for", "Chronic Pain"),
    ("Chronic Pain", "managed by", "Physical Therapy"),
    ("Physical Therapy", "improves", "Joint Mobility"),
    ("Joint Mobility", "reduced by", "Arthritis"),
    ("Arthritis", "treated with", "Anti-Inflammatory Drugs"),
    ("Anti-Inflammatory Drugs", "reduce", "Joint Swelling"),
    ("Joint Swelling", "causes", "Reduced Mobility"),
    ("Reduced Mobility", "managed by", "Assistive Devices"),
    ("Assistive Devices", "recommended for", "Elderly Patients"),
    ("Elderly Patients", "at risk of", "Osteoporosis"),
    ("Osteoporosis", "causes", "Fractures"),
    ("Fractures", "treated with", "Bone Grafts"),
    ("Bone Grafts", "used in", "Orthopedic Surgery"),
    ("Orthopedic Surgery", "required for", "Severe Fractures"),
    ("Severe Fractures", "lead to", "Reduced Quality of Life"),
    ("Reduced Quality of Life", "improved by", "Rehabilitation Therapy"),
    ("Rehabilitation Therapy", "supports", "Mental Health"),
    ("Mental Health", "affected by", "Chronic Illness"),
    ("Chronic Illness", "linked to", "Depression"),
    ("Depression", "treated with", "SSRIs"),
    ("SSRIs", "used for", "Anxiety Disorders"),
    ("Anxiety Disorders", "cause", "Sleep Disturbances"),
    ("Sleep Disturbances", "managed by", "Melatonin Supplements"),
    ("Melatonin Supplements", "improve", "Sleep Quality"),
    ("Sleep Quality", "linked to", "Mental Well-being"),
    ("Mental Well-being", "improved by", "Exercise"),
    ("Exercise", "benefits", "Cardiovascular Health"),
    ("Cardiovascular Health", "enhanced by", "Heart-Healthy Diet"),
    ("Heart-Healthy Diet", "reduces", "Cholesterol Levels"),
    ("Cholesterol Levels", "monitored by", "Blood Tests"),
    ("Blood Tests", "used to", "Diagnose Conditions"),
    ("Conditions", "include", "Diabetes"),
]


# Build the knowledge graph
def BuildKG(triplets):
    G = nx.DiGraph()
    RELdir = {}
    for head, relation, tail in triplets:
        G.add_edge(head, tail, label=relation)
        RELdir[(head, tail)] = relation
    return G, RELdir

# Hardcoded ground truths
def hardcoded_ground_truth():
    return [
        "Diabetes can cause complications such as nerve damage, kidney disease, eye damage (retinopathy), heart disease, and poor wound healing.",
        "Diabetes increases the risk of cardiovascular disease, stroke, kidney failure, blindness, neuropathy, and lower limb amputation.",
        "Diabetes is managed or treated using insulin therapy, oral medications, diet control, exercise, and lifestyle changes.",
        "Smoking causes lung cancer, chronic obstructive pulmonary disease (COPD), heart disease, stroke, and respiratory infections.",
        "Hypertension increases the risk of heart disease, stroke, kidney failure, aneurysms, and vision loss."
    ]

# Retrieve responses from knowledge graph
def retrieve_from_graph(query, G, RELdir):
    entity, relation_type = query.split(" ")[2], " ".join(query.split(" ")[3:-1])
    results = []
    for (head, tail), relation in RELdir.items():
        if head == entity and relation_type.lower() in relation.lower():
            results.append(f"{head} {relation} {tail}.")
    return results if results else ["No information found."]

# Semantic similarity evaluation
def evaluate_semantic_similarity(generated_responses, ground_truth):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    scores = []
    for gen, truth in zip(generated_responses, ground_truth):
        gen_embedding = model.encode(gen, convert_to_tensor=True).cpu()
        truth_embedding = model.encode(truth, convert_to_tensor=True).cpu()
        similarity = cosine_similarity(gen_embedding.numpy().reshape(1, -1), truth_embedding.numpy().reshape(1, -1))
        scores.append(similarity[0][0])
    return scores

# Main Execution
if __name__ == "__main__":
    # Queries
    queries = [
        "What does Diabetes cause?",
        "What does Diabetes increase risk of?",
        "What does Diabetes treat?",
        "What does Smoking cause?",
        "What does Hypertension increase risk of?"
    ]

    # Build graph and relationships
    G, RELdir = BuildKG(triplets)

    # Generate responses from the graph
    generated_responses = []
        response = retrieve_from_graph(query, G, RELdir)
        generated_responses.append(response[0] if response else "No information found.")

    # Hardcoded ground truths
    ground_truth = hardcoded_ground_truth()

    # Evaluate semantic similarity
    scores = evaluate_semantic_similarity(generated_responses, ground_truth)

    # Print results
    for i, (query, generated, truth, score) in enumerate(zip(queries, generated_responses, ground_truth, scores)):
        print(f"Query {i+1}: {query}")
        print(f"Generated Response: {generated}")
        print(f"Ground Truth: {truth}")
        print(f"Semantic Similarity Score: {score:.4f}\n")


Query 1: What does Diabetes cause?
Generated Response: Diabetes causes Neuropathy.
Ground Truth: Diabetes can cause complications such as nerve damage, kidney disease, eye damage (retinopathy), heart disease, and poor wound healing.
Semantic Similarity Score: 0.6972

Query 2: What does Diabetes increase risk of?
Generated Response: No information found.
Ground Truth: Diabetes increases the risk of cardiovascular disease, stroke, kidney failure, blindness, neuropathy, and lower limb amputation.
Semantic Similarity Score: 0.0683

Query 3: What does Diabetes treat?
Generated Response: Diabetes causes Neuropathy.
Ground Truth: Diabetes is managed or treated using insulin therapy, oral medications, diet control, exercise, and lifestyle changes.
Semantic Similarity Score: 0.4538

Query 4: What does Smoking cause?
Generated Response: Smoking causes Lung Cancer.
Ground Truth: Smoking causes lung cancer, chronic obstructive pulmonary disease (COPD), heart disease, stroke, and respiratory infect