In [1]:
!git clone https://github.com/AziizBg/retriever

Cloning into 'retriever'...
remote: Enumerating objects: 236, done.[K
remote: Counting objects: 100% (236/236), done.[K
remote: Compressing objects: 100% (196/196), done.[K
remote: Total 236 (delta 63), reused 197 (delta 39), pack-reused 0 (from 0)[K
Receiving objects: 100% (236/236), 8.20 MiB | 9.85 MiB/s, done.
Resolving deltas: 100% (63/63), done.


In [2]:
%cd retriever

/content/retriever


In [6]:
!pip install -r requirements.txt

Looking in links: https://data.pyg.org/whl/torch-2.1.0+cpu.html


In [5]:
!git checkout synthetic_data

Branch 'synthetic_data' set up to track remote branch 'synthetic_data' from 'origin'.
Switched to a new branch 'synthetic_data'


In [22]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch
from torch_geometric.data import Data
import json
import gensim
from torch import nn
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer
from torch.utils.data import DataLoader
from pcst_fast import pcst_fast
import re
from datetime import datetime


# Load embedding modules

In [3]:
pretrained_repo = 'sentence-transformers/all-roberta-large-v1'
batch_size = 1024  # Adjust the batch size as needed

class Dataset(torch.utils.data.Dataset):
    def __init__(self, input_ids=None, attention_mask=None):
        super().__init__()
        self.data = {
            "input_ids": input_ids,
            "att_mask": attention_mask,
        }

    def __len__(self):
        return self.data["input_ids"].size(0)

    def __getitem__(self, index):
        if isinstance(index, torch.Tensor):
            index = index.item()
        batch_data = dict()
        for key in self.data.keys():
            if self.data[key] is not None:
                batch_data[key] = self.data[key][index]
        return batch_data

class Sentence_Transformer(nn.Module):

    def __init__(self, pretrained_repo):
        super(Sentence_Transformer, self).__init__()
        print(f"inherit model weights from {pretrained_repo}")
        self.bert_model = AutoModel.from_pretrained(pretrained_repo)

    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
        data_type = token_embeddings.dtype
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).to(data_type)
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

    def forward(self, input_ids, att_mask):
        bert_out = self.bert_model(input_ids=input_ids, attention_mask=att_mask)
        sentence_embeddings = self.mean_pooling(bert_out, att_mask)

        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
        return sentence_embeddings

def load_sbert():

    model = Sentence_Transformer(pretrained_repo)
    tokenizer = AutoTokenizer.from_pretrained(pretrained_repo)

    # data parallel
    if torch.cuda.device_count() > 1:
        print(f'Using {torch.cuda.device_count()} GPUs')
        model = nn.DataParallel(model)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    return model, tokenizer, device


def sbert_text2embedding(model, tokenizer, device, text):
    if len(text) == 0:
        return torch.zeros((0, 1024))

    encoding = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    dataset = Dataset(input_ids=encoding.input_ids, attention_mask=encoding.attention_mask)

    # DataLoader
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # Placeholder for storing the embeddings
    all_embeddings = []

    # Iterate through batches
    with torch.no_grad():

        for batch in dataloader:
            # Move batch to the appropriate device
            batch = {key: value.to(device) for key, value in batch.items()}

            # Forward pass
            embeddings = model(input_ids=batch["input_ids"], att_mask=batch["att_mask"])

            # Append the embeddings to the list
            all_embeddings.append(embeddings)

    # Concatenate the embeddings from all batches
    all_embeddings = torch.cat(all_embeddings, dim=0).cpu()

    return all_embeddings

load_model = {
    'sbert': load_sbert,
}

load_text2embedding = {
    'sbert': sbert_text2embedding,
}

# Preprocess the data

In [4]:
model_name = 'sbert'
path = 'dataset'
path_nodes = f'{path}/nodes'
path_edges = f'{path}/edges'
path_graphs = f'{path}/graphs'

In [10]:
from pymilvus import MilvusClient, DataType, FieldSchema, CollectionSchema

CLUSTER_ENDPOINT = (
    "https://in03-7a5f9d2a1aa84ef.serverless.gcp-us-west1.cloud.zilliz.com"
)
API_KEY = (
    "a73c79fb1924d05aeb410abc0d5669293cc33be37a123953be640725aa42198ef5c1e499cc07f231977c742ad6e6977c6eddec05"
)

milvus_client = MilvusClient(uri=CLUSTER_ENDPOINT, token=API_KEY)

COLLECTION_NAME = "graph_embeddings"
VECTOR_DIM      = 1024          # SBERT default; change if yours differs
METRIC_TYPE     = "COSINE"     # or "IP" for inner-product

# Drop the collection if it already exists (for a clean slate)
if COLLECTION_NAME in milvus_client.list_collections():
    milvus_client.drop_collection(COLLECTION_NAME)

# Define the schema
schema = CollectionSchema(
    fields=[
        FieldSchema(
            name="graph_id",
            dtype=DataType.INT64,
            is_primary=True,
            auto_id=False
        ),
        FieldSchema(
            name="embedding",
            dtype=DataType.FLOAT_VECTOR,
            dim=VECTOR_DIM
        ),
        FieldSchema(                 # optional metadata field
            name="graph_idx",
            dtype=DataType.INT64
        )
    ],
    description="Mean-pooled SBERT embeddings of knowledge graphs"
)

index_params = milvus_client.prepare_index_params()
index_params.add_index("embedding", index_type="IVF_FLAT", metric_type="COSINE", index_params={"nlist": 64})
milvus_client.create_collection(
    collection_name=COLLECTION_NAME,
    schema=schema,
    consistency_level="Strong",
    index_params=index_params
)

In [None]:
def preprocessing_step_one():
    # Load the graphs from the JSON file
    with open('graphs.json', 'r') as f:
        graphs = json.load(f)

    # Create directories if they don't exist
    os.makedirs(path_nodes, exist_ok=True)
    os.makedirs(path_edges, exist_ok=True)

    # Process each graph
    for i, triples in enumerate(tqdm(graphs)):
        node_map = {}   # Maps node label → node ID
        edges = []

        for h, r, t in triples:
            h = h.lower()
            t = t.lower()
            if h not in node_map:
                node_map[h] = len(node_map)
            if t not in node_map:
                node_map[t] = len(node_map)
            edges.append({'src': node_map[h], 'edge_attr': r, 'dst': node_map[t]})

        # Convert node map to DataFrame
        nodes_df = pd.DataFrame(
            [{'node_id': v, 'node_attr': k} for k, v in node_map.items()],
            columns=['node_id', 'node_attr']
        )

        # Convert edge list to DataFrame
        edges_df = pd.DataFrame(edges, columns=['src', 'edge_attr', 'dst'])

        # Save to CSV
        nodes_df.to_csv(f'{path_nodes}/{i}.csv', index=False)
        edges_df.to_csv(f'{path_edges}/{i}.csv', index=False)

preprocessing_step_one()

100%|██████████| 5/5 [00:00<00:00, 118.42it/s]


In [None]:
def preprocessing_step_two():
    print("Loading local knowledge base...")
    with open('graphs.json', 'r') as f:
        graphs = json.load(f)

    model, tokenizer, device = load_model[model_name]()
    text2embedding = load_text2embedding[model_name]

    print("Embedding and storing graphs in milvusDB...")
    os.makedirs(path_graphs, exist_ok=True)

    milvus_vectors = []

    for index in tqdm(range(len(graphs))):
        # --- Load nodes & edges ---
        nodes_path = f'{path_nodes}/{index}.csv'
        edges_path = f'{path_edges}/{index}.csv'
        if not os.path.exists(nodes_path) or not os.path.exists(edges_path):
            print(f'Skipping graph {index} (missing files)')
            continue

        nodes = pd.read_csv(nodes_path)
        edges = pd.read_csv(edges_path)
        nodes.node_attr.fillna("", inplace=True)

        if len(nodes) == 0:
            print(f'Empty graph at index {index}')
            continue

        # --- Embed node and edge attributes ---
        x = text2embedding(model, tokenizer, device, nodes.node_attr.tolist())
        edge_attr = text2embedding(model, tokenizer, device, edges.edge_attr.tolist())
        edge_index = torch.LongTensor([edges.src.tolist(), edges.dst.tolist()])

        # --- Save graph as torch_geometric.Data ---
        pyg_graph = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, num_nodes=len(nodes))
        torch.save(pyg_graph, f'{path_graphs}/{index}.pt')

        # --- Compute graph-level embedding (mean of node embeddings) ---
        graph_embedding = torch.mean(x, dim=0).cpu().tolist()

        # --- Store in Milvus format: [graph_id, embedding, index] ---
        milvus_vectors.append({"graph_id": index, "embedding": graph_embedding, "graph_idx": index})

    # --- Final batch insert into Milvus ---
    if milvus_vectors:
        milvus_client.insert(
            collection_name=COLLECTION_NAME,
            data=milvus_vectors,
            auto_id=False
        )
        milvus_client.flush(COLLECTION_NAME)
        print(f"Inserted {len(milvus_vectors)} graph embeddings into Milvus.")
    else:
        print("No graphs were inserted into Milvus.")

preprocessing_step_two()



Loading local knowledge base...
inherit model weights from sentence-transformers/all-roberta-large-v1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/328 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Embedding and storing graphs in milvusDB...


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  nodes.node_attr.fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  nodes.node_attr.fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always beha

Inserted 5 graph embeddings into Milvus.


In [6]:
def retrieval_via_pcst(graph, q_emb, textual_nodes, textual_edges, topk=3, topk_e=3, cost_e=0.5):
    c = 0.01
    if len(textual_nodes) == 0 or len(textual_edges) == 0:
        desc = textual_nodes.to_csv(index=False) + '\n' + textual_edges.to_csv(index=False, columns=['src', 'edge_attr', 'dst'])
        graph = Data(x=graph.x, edge_index=graph.edge_index, edge_attr=graph.edge_attr, num_nodes=graph.num_nodes)
        return graph, desc

    root = -1  # unrooted
    num_clusters = 1
    pruning = 'gw'
    verbosity_level = 0
    if topk > 0:
        n_prizes = torch.nn.CosineSimilarity(dim=-1)(q_emb, graph.x)
        topk = min(topk, graph.num_nodes)
        _, topk_n_indices = torch.topk(n_prizes, topk, largest=True)

        n_prizes = torch.zeros_like(n_prizes)
        n_prizes[topk_n_indices] = torch.arange(topk, 0, -1).float()
    else:
        n_prizes = torch.zeros(graph.num_nodes)

    if topk_e > 0:
        e_prizes = torch.nn.CosineSimilarity(dim=-1)(q_emb, graph.edge_attr)
        topk_e = min(topk_e, e_prizes.unique().size(0))

        topk_e_values, _ = torch.topk(e_prizes.unique(), topk_e, largest=True)
        e_prizes[e_prizes < topk_e_values[-1]] = 0.0
        last_topk_e_value = topk_e
        for k in range(topk_e):
            indices = e_prizes == topk_e_values[k]
            value = min((topk_e-k)/sum(indices), last_topk_e_value)
            e_prizes[indices] = value
            last_topk_e_value = value*(1-c)
        # reduce the cost of the edges such that at least one edge is selected
        cost_e = min(cost_e, e_prizes.max().item()*(1-c/2))
    else:
        e_prizes = torch.zeros(graph.num_edges)

    costs = []
    edges = []
    vritual_n_prizes = []
    virtual_edges = []
    virtual_costs = []
    mapping_n = {}
    mapping_e = {}
    for i, (src, dst) in enumerate(graph.edge_index.T.numpy()):
        prize_e = e_prizes[i]
        if prize_e <= cost_e:
            mapping_e[len(edges)] = i
            edges.append((src, dst))
            costs.append(cost_e - prize_e)
        else:
            virtual_node_id = graph.num_nodes + len(vritual_n_prizes)
            mapping_n[virtual_node_id] = i
            virtual_edges.append((src, virtual_node_id))
            virtual_edges.append((virtual_node_id, dst))
            virtual_costs.append(0)
            virtual_costs.append(0)
            vritual_n_prizes.append(prize_e - cost_e)

    prizes = np.concatenate([n_prizes, np.array(vritual_n_prizes)])
    num_edges = len(edges)
    if len(virtual_costs) > 0:
        costs = np.array(costs+virtual_costs)
        edges = np.array(edges+virtual_edges)

    vertices, edges = pcst_fast(edges, prizes, costs, root, num_clusters, pruning, verbosity_level)

    selected_nodes = vertices[vertices < graph.num_nodes]
    selected_edges = [mapping_e[e] for e in edges if e < num_edges]
    virtual_vertices = vertices[vertices >= graph.num_nodes]
    if len(virtual_vertices) > 0:
        virtual_vertices = vertices[vertices >= graph.num_nodes]
        virtual_edges = [mapping_n[i] for i in virtual_vertices]
        selected_edges = np.array(selected_edges+virtual_edges)

    edge_index = graph.edge_index[:, selected_edges]
    selected_nodes = np.unique(np.concatenate([selected_nodes, edge_index[0].numpy(), edge_index[1].numpy()]))

    n = textual_nodes.iloc[selected_nodes]
    e = textual_edges.iloc[selected_edges]
    desc = n.to_csv(index=False)+'\n'+e.to_csv(index=False, columns=['src', 'edge_attr', 'dst'])

    mapping = {n: i for i, n in enumerate(selected_nodes.tolist())}

    x = graph.x[selected_nodes]
    edge_attr = graph.edge_attr[selected_edges]
    src = [mapping[i] for i in edge_index[0].tolist()]
    dst = [mapping[i] for i in edge_index[1].tolist()]
    edge_index = torch.LongTensor([src, dst])
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, num_nodes=len(selected_nodes))

    return data, desc

In [7]:
def retreival(question, k=3):
    model, tokenizer, device = load_model[model_name]()
    text2embedding = load_text2embedding[model_name]
    # Encode question
    q_emb = text2embedding(model, tokenizer, device, [question])[0]

    # Ensure collection is loaded before search
    try:
        milvus_client.load_collection(COLLECTION_NAME)
    except Exception as e:
        print(f"Error loading collection: {e}")
        return [], []

    # Perform similarity search in Milvus
    search_results = milvus_client.search(
        collection_name=COLLECTION_NAME,
        data=[q_emb.tolist()],
        limit=k,
        search_params={"metric_type": METRIC_TYPE, "params": {}},
        output_fields=["graph_idx"]
    )

    # Extract graph indices from results
    hits = search_results[0]
    graph_indices = [hit["entity"]["graph_idx"] for hit in hits]

    # Collect subgraphs and descriptions
    sub_graphs = []
    descriptions = []

    for index in tqdm(graph_indices, desc="Retrieving subgraphs"):
        nodes_path = f'{path_nodes}/{index}.csv'
        edges_path = f'{path_edges}/{index}.csv'
        graph_path = f'{path_graphs}/{index}.pt'

        if not (os.path.exists(nodes_path) and os.path.exists(edges_path) and os.path.exists(graph_path)):
            print(f"Missing data for graph {index}")
            continue

        nodes = pd.read_csv(nodes_path)
        edges = pd.read_csv(edges_path)
        if len(nodes) == 0:
            print(f"Empty graph at index {index}")
            continue

        graph = torch.load(graph_path)

        # Apply your custom retrieval logic (must be defined elsewhere)
        subg, desc = retrieval_via_pcst(
            graph=graph,
            q_emb=q_emb,
            textual_nodes=nodes,
            textual_edges=edges,
            topk=3,
            topk_e=5,
            cost_e=0.5
        )

        sub_graphs.append(subg)
        descriptions.append(desc)

    return sub_graphs, descriptions

question = "How does air pollution impact the treatment or worsening of asthma and COPD symptoms?"
retreival(question, k=3)

inherit model weights from sentence-transformers/all-roberta-large-v1


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/328 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Error loading collection: name 'milvus_client' is not defined


([], [])

In [8]:
from openai import OpenAI

# Initialize OpenAI or NVIDIA client
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key="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"
)

def construct_rag_prompt(question, descriptions):
    """
    Constructs a prompt for RAG using the retrieved knowledge graph descriptions.
    """
    # Start with system message
    system_message = """You are a medical knowledge assistant that helps answer questions based on medical knowledge graphs.
Use only the provided knowledge graphs to answer the question. If the knowledge graphs don't contain enough information to fully answer the question,
say I don't know. Be precise and factual in your responses. Don't cite the node and graph ids in your responses for example (Knowledge Graph number). Format the responses for more clarity.
"""

    # Format the context from knowledge graphs
    context = "Here are the relevant medical knowledge graphs:\n\n"
    for i, desc in enumerate(descriptions, 1):
        # Split the description into nodes and edges
        parts = desc.split('\n\n')
        nodes = parts[0]
        edges = parts[1] if len(parts) > 1 else ""

        context += f"Knowledge Graph {i}:\n"
        context += f"Nodes:\n{nodes}\n"
        if edges:
            context += f"Relationships:\n{edges}\n"
        context += "\n"

    # Construct the final prompt
    prompt = f"{system_message}\n\n{context}\nQuestion: {question}\n\nAnswer:"

    return prompt



In [11]:
def generate_response(question):
  sub_graphs, descriptions = retreival(question, k=3)
  rag_prompt = construct_rag_prompt(question, descriptions)

  # Now you can use this prompt with your LLM
  completion = client.chat.completions.create(
      model="nvidia/llama-3.1-nemotron-ultra-253b-v1",
      messages=[
          {"role": "system", "content": "You are a medical knowledge assistant."},
          {"role": "user", "content": rag_prompt}
      ],
      temperature=0.6,
      top_p=0.95,
      max_tokens=4096,
      frequency_penalty=0,
      presence_penalty=0,
      stream=True
  )

  #   words to delete from the response:
  to_delete = [
    r"\(Knowledge Graph \d+\)",
    r"Knowledge Graph \d+"
  ]

  # Print the streaming response and save the response
  full_response = ""
  for chunk in completion:
    if chunk.choices[0].delta.content is not None:
      content = chunk.choices[0].delta.content
      for word in to_delete:
          content = re.sub(word, "", content)
      # print(content, end="")
      full_response += content


  return full_response


# Example usage:
questions = [
    "How does air pollution impact the treatment or worsening of asthma and COPD symptoms?",
    "How does air pollution impact the treatment or worsening of COPD symptoms?",
    "How does air pollution impact the treatment or worsening of asthma symptoms?",
    "What does asthma mean?",
    "What is the color of Zied's shoes?"
]

for question in questions:
    print(f"\n\nQuestion: {question}\n")
    # Get the response
    full_response = generate_response(question)
    print(f"Response: {full_response}\n")



Question: How does air pollution impact the treatment or worsening of asthma and COPD symptoms?

inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


Response: **Air Pollution's Impact on Asthma and COPD**

Air pollution can significantly affect both asthma and Chronic Obstructive Pulmonary Disease (COPD) by exacerbating symptoms and potentially impacting treatment efficacy. Here’s a breakdown of its effects:

1. **Exacerbation of Symptoms**:
   - **Asthma**: Air pollutants like particulate matter (PM), ozone (O3), nitrogen dioxide (NO2), and sulfur dioxide (SO2) can trigger asthma attacks, increase airway inflammation, and reduce lung function. These pollutants can irritate the airways, leading to wheezing, coughing, shortness of breath, and chest tightness.
   - **COPD**: Similarly, exposure to air pollutants can worsen COPD symptoms. Increased levels of PM, especially PM2.5, and gaseous pollutants can lead to increased respiratory symptoms, reduced lung function, and increased risk of hospitalizations due to COPD exacerbations.

2. **Impact on Treatment**:
   - While the provided knowledge graphs do not explicitly detail how air 

Retrieving subgraphs: 0it [00:00, ?it/s]


Response: **Impact of Air Pollution on COPD Treatment and Symptoms**

Air pollution can significantly affect individuals with Chronic Obstructive Pulmonary Disease (COPD) in several ways:

1. **Worsening of Symptoms**: Exposure to air pollutants, such as particulate matter (PM), nitrogen dioxide (NO2), and ozone (O3), can exacerbate COPD symptoms. These pollutants can induce inflammation and oxidative stress in the lungs, leading to increased breathlessness, coughing, and sputum production.

2. **Reduced Lung Function**: Air pollution can decrease lung function in COPD patients, making it harder for them to breathe. This reduction can be temporary, but repeated exposure can lead to more permanent declines in lung function.

3. **Increased Risk of Acute Exacerbations**: Air pollution is a known trigger for acute exacerbations of COPD, which are episodes of sudden worsening of symptoms that require additional medical treatment. These exacerbations can lead to hospitalizations and a decli

Retrieving subgraphs: 0it [00:00, ?it/s]


Response: **Impact of Air Pollution on Asthma**

Air pollution can significantly affect both the treatment and worsening of asthma symptoms. Here's a breakdown of the effects based on the provided knowledge:

1. **Worsening of Symptoms**:
	* Air pollution, particularly **particulate matter (PM)**, **nitrogen dioxide (NO2)**, and **ozone (O3)**, can irritate the airways, leading to increased inflammation and bronchoconstriction.
	* Exposure to these pollutants can trigger asthma attacks in susceptible individuals, making symptoms more severe and frequent.

2. **Treatment Challenges**:
	* The presence of air pollution may reduce the effectiveness of standard asthma treatments, such as **inhaled corticosteroids** and **bronchodilators**.
	* Individuals with asthma may require more frequent use of rescue medications or adjustments to their treatment plans to manage symptoms exacerbated by pollution.

3. **Preventive Measures**:
	* Monitoring air quality indices and avoiding outdoor activit

Retrieving subgraphs: 0it [00:00, ?it/s]


Response: **Asthma Definition**

Asthma is a chronic respiratory disease characterized by inflammation, airway obstruction, and spasm of the bronchial tubes, leading to recurring episodes of wheezing, breathlessness, chest tightness, and coughing, particularly at night or early in the morning.



Question: What is the color of Zied's shoes?

inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


Response: I don't know.



In [34]:
def evaluate_response(question, context, response, label=None, report=False):
    """
    Evaluates the LLM's response based on clarity, exactitude, and context adherence.

    Args:
        question (str): Original question
        context (str): Retrieved knowledge graph context
        response (str): LLM's response to evaluate
        label (str, optional): Ground truth label for comparison
        report (bool): Whether to save evaluation to file

    Returns:
        dict: Evaluation scores and feedback
    """

    if report:
        current_date = datetime.now().strftime("%Y-%m-%d")
        with open(f"evaluation_{current_date}.txt", "a") as f:
            f.write(f"Question: {question}\n")
            f.write(f"Context: {context}\n")
            f.write(f"Response: {response}\n")
            f.write(f"Label: {label}\n")


    evaluation_prompt = f"""You are an expert evaluator of medical knowledge responses. Evaluate the following response based on three criteria:

1. Clarity (0-5): How clear and well-structured is the response? 0 is the worst, 1 is the best.
2. Exactitude (0-5): How accurate and precise is the information provided? 0 is the worst, 1 is the best.
3. Context Adherence (0-5): How well does the response stick to the provided knowledge graphs? 0 is the worst, 1 is the best.
4. Relevance (0-5): How relevant is the retrieved Knowledge Graph Context to the question? 0 is the worst, 1 is the best.
5. Completeness (0-5): How complete and thorough is the response? 0 is the worst, 1 is the best.
6. Logical Flow (0-5): How coherent and well-structured is the response? 0 is the worst, 1 is the best.
7. Uncertainty Handling (0-5): How well does the response acknowledge limitations and uncertainties? 0 is the worst, 1 is the best.


Question: {question}

Knowledge Graph Context:
{context}

Response to Evaluate:
{response}

Provide your evaluation in the following format:
CLARITY: [score]/5 - [brief explanation]
EXACTITUDE: [score]/5 - [brief explanation]
CONTEXT ADHERENCE: [score]/5 - [brief explanation]
RELEVANCE: [score]/5 - [brief explanation]
COMPLETENESS: [score]/5 - [brief explanation]
LOGICAL FLOW: [score]/5 - [brief explanation]
UNCERTAINTY HANDLING: [score]/5 - [brief explanation]
OVERALL FEEDBACK: [average score] and 2-3 sentences summarizing the evaluation]
"""

    if label is not None:
        evaluation_prompt += f"Ground Truth Label: {label}\n"

    evaluation = client.chat.completions.create(
        model="nvidia/llama-3.1-nemotron-ultra-253b-v1",
        messages=[
            {"role": "system", "content": "You are an expert evaluator of medical knowledge responses."},
            {"role": "user", "content": evaluation_prompt}
        ],
        temperature=0.3,  # Lower temperature for more consistent evaluation
        top_p=0.95,
        max_tokens=1024,
        frequency_penalty=0,
        presence_penalty=0,
        stream=True
    )

    print("\n=== Response Evaluation ===\n")

    # Initialize variables to store the complete response
    full_evaluation = ""
    scores = {
        "clarity_score": None,
        "exactitude_score": None,
        "context_adherence_score": None,
        "relevance_score": None,
        "completeness_score": None,
        "logical_flow_score": None,
        "uncertainty_handling_score": None,
        "overall_feedback": None
    }

    for chunk in evaluation:
        if chunk.choices[0].delta.content is not None:
            content = chunk.choices[0].delta.content
            # print(content, end="")
            full_evaluation += content

    # Extract scores from the complete evaluation
    scores["clarity_score"] = re.findall(r"CLARITY: (\d+(?:\.\d+)?)", full_evaluation)
    scores["exactitude_score"] = re.findall(r"EXACTITUDE: (\d+(?:\.\d+)?)", full_evaluation)
    scores["context_adherence_score"] = re.findall(r"CONTEXT ADHERENCE: (\d+(?:\.\d+)?)", full_evaluation)
    scores["relevance_score"] = re.findall(r"RELEVANCE: (\d+(?:\.\d+)?)", full_evaluation)
    scores["completeness_score"] = re.findall(r"COMPLETENESS: (\d+(?:\.\d+)?)", full_evaluation)
    scores["logical_flow_score"] = re.findall(r"LOGICAL FLOW: (\d+(?:\.\d+)?)", full_evaluation)
    scores["uncertainty_handling_score"] = re.findall(r"UNCERTAINTY HANDLING: (\d+(?:\.\d+)?)", full_evaluation)
    scores["overall_feedback"] = re.findall(r"OVERALL FEEDBACK: (\d+(?:\.\d+)?)", full_evaluation)

    # Convert list matches to single values
    for key in scores:
        if scores[key]:
            scores[key] = scores[key][0] if isinstance(scores[key], list) else scores[key]

    print("\n=== Evaluation Complete ===\n")

    if report:
        with open(f"evaluation_{current_date}.txt", "a") as f:
            f.write("\nScores:\n")
            f.write(json.dumps(scores, indent=2))
            f.write("\n\n" + "="*50 + "\n\n")

    return scores


In [15]:
from scripts.load_qa_data import load_qa

all_questions, all_answers= load_qa()
# all_questions

loading file  generated_qa/all_covid_data_qa.json
loading file  generated_qa/all_asthma_data_qa.json
loading file  generated_qa/all_pneumonia_data_qa.json
loading file  generated_qa/all_copcd_data_qa.json
loading file  generated_qa/all_tuberculosis_data_qa.json


In [35]:
sum_score = 0
for i, question in enumerate(all_questions):
    response = generate_response(question)
    sub_graphs, descriptions = retreival(question, k=3)
    context = "\n".join(descriptions)
    scores = evaluate_response(question, context, response, all_answers[i], True )
    sum_score += float(scores["overall_feedback"])
    # print(sum_score)
average = sum_score / len(all_questions)
print(average)




inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '3', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.3'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '3', 'overall_feedback': '4.4'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.71'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '4', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.57'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '4', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '4', 'uncertainty_handling_score': '3', 'overall_feedback': '4.1'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '3', 'overall_feedback': '4.4'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.57'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.7'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '1', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.0'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.6'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '5'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.4'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.7'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '3', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '3', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.3'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '3', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.3'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '5'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '4', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '3', 'overall_feedback': '4.4'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '5'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '5'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '4', 'logical_flow_score': '5', 'uncertainty_handling_score': '5', 'overall_feedback': '4.86'}
inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]


inherit model weights from sentence-transformers/all-roberta-large-v1


Retrieving subgraphs: 0it [00:00, ?it/s]



=== Response Evaluation ===


=== Evaluation Complete ===

{'clarity_score': '5', 'exactitude_score': '5', 'context_adherence_score': '5', 'relevance_score': '5', 'completeness_score': '5', 'logical_flow_score': '5', 'uncertainty_handling_score': '4', 'overall_feedback': '4.86'}
4.703725490196081
