In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install groq rouge-score bert-score

Collecting groq
  Downloading groq-0.13.1-py3-none-any.whl.metadata (14 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.13.1-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=4b477949c830747349345adb1918c335ca032a5db8a2187fe0a9f30d6549d70f
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a

In [3]:
import os
import csv
from groq import Groq
import pandas as pd
from rouge_score import rouge_scorer
from bert_score import score
from IPython.display import display
import warnings

# RAG source

In [4]:
# Initialize the Groq client
client = Groq(
    api_key=os.getenv("GROQ_API_KEY", "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")
)

# Load the RAG source data (retrieval document)
reference_guidelines_file = "/content/drive/MyDrive/P2/LLM/LLL-Advice/sources/3in1_source.csv"
trusted_data = pd.read_csv(reference_guidelines_file)

In [5]:
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")

# Groq's with RAG from all 3 sources

In [6]:
def retrieve_context(disease):
    """
    Retrieve and combine causes for a given disease from all sources.
    """
    matches = trusted_data[trusted_data['disease'].str.lower() == disease.lower()]
    if not matches.empty:
        combined_context = []
        for _, row in matches.iterrows():
            combined_context.append(f"{row['source']}: {row['cause']}")
        return " ".join(combined_context)  # Combine all contexts into one string
    return None  # Return None if no match is found


def llama_api(prompt_content, max_tokens=300):
    """
    Calls Groq's Llama API with the given prompt and a token limit.
    """
    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt_content}],
            model="gemma2-9b-it",
            max_tokens=max_tokens
        )
        return chat_completion.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error calling Groq LLM API: {e}")
        return "Error: No response generated."

In [7]:
# List of diseases
diseases = [
 'diabetes', 'pneumonia', 'asthma', 'gout', 'malaria', 'migraine',
 'appendicitis', 'diarrhea', 'endometriosis', 'epilepsy', 'fever',
 'fibromyalgia', 'gallstones', 'GERD', 'heart attack', 'hemorrhoids',
 'hepatitis A', 'hepatitis B', 'hepatitis C', 'hernia', 'HIV', 'hives',
 'kidney stones', 'obesity', 'osteoarthritis', 'osteoporosis',
 "Parkinson's disease", 'psoriatic arthritis', 'cirrhosis', 'delirium', 'epilepsy',
 'heart failure', 'peptic ulcer', 'urinary tract infection (UTI)', 'melanoma',
 'gastritis', 'glaucoma', 'tachycardia', 'anemia', 'pancreatitis',
 'tuberculosis', 'chickenpox', 'mumps', 'tetanus', 'polio', 'lupus',
 'hemophilia', 'Chronic Fatigue Syndrome (CFS)',
]

In [8]:
# RAG Output file paths
output_file = "/content/drive/MyDrive/P2/LLM/LLL-Advice/RAG-Gemma.csv"

In [9]:
evaluation_results_file = "/content/drive/MyDrive/P2/LLM/LLL-Advice/Gemma_RAG-3in1-BERTScore.csv"
reference_files = {
    "MayoClinic": "/content/drive/MyDrive/P2/LLM/LLL-Advice/sources/MayoClinic.csv",
    "Healthline": "/content/drive/MyDrive/P2/LLM/LLL-Advice/sources/Healthline.csv",
    "MedlinePlus": "/content/drive/MyDrive/P2/LLM/LLL-Advice/sources/MedlinePlus.csv",
}

In [10]:
# Load references into a dictionary
reference_data = {}
for source, path in reference_files.items():
    reference_data[source] = pd.read_csv(path)

In [11]:
# Generate disease causes using RAG
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Disease", "Cause"])  # Header

    for disease in diseases:
        # Retrieve relevant context
        context = retrieve_context(disease)
        if context:
            prompt = f"Here is some trusted information: {context}\n\nBased on this, what are the main causes of {disease}?"
        else:
            prompt = f"What are the main causes of {disease}?"

        # Query the LLM
        response = llama_api(prompt, max_tokens=300)

        # Ensure response is properly formatted
        cause = response.strip()

        # Write the cause into the CSV
        writer.writerow([disease, cause])

print("RAG-enhanced responses generated and saved successfully.")

RAG-enhanced responses generated and saved successfully.


# Evaluation with ROUGE and BERTScore

In [12]:
# Evaluation with ROUGE and BERTScore
llm_responses = pd.read_csv(output_file)

In [13]:
import os
os.environ["HF_TOKEN"] = "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"

In [14]:
# Initialize evaluation lists for BERTScore and ROUGE
bert_scores_healthline = []
bert_scores_medlineplus = []
bert_scores_mayoclinic = []

rouge1_healthline = []
rouge2_healthline = []
rougeL_healthline = []

rouge1_medlineplus = []
rouge2_medlineplus = []
rougeL_medlineplus = []

rouge1_mayoclinic = []
rouge2_mayoclinic = []
rougeL_mayoclinic = []

def evaluate_response(reference, candidate):
    try:
        rouge_scorer_instance = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        # ROUGE
        rouge_result = rouge_scorer_instance.score(reference, candidate)

        # BERTScore
        P, R, F1 = score([candidate], [reference], lang="en", verbose=False)

        return rouge_result, F1.mean().item()
    except Exception as e:
        print(f"Error in evaluation: {e}")
        return None, None

# Evaluate responses against each reference
for _, row in llm_responses.iterrows():
    disease = row["Disease"]
    candidate = row["Cause"]

    # Evaluate against Healthline
    healthline_row = reference_data["Healthline"][reference_data["Healthline"]["disease"] == disease]
    if not healthline_row.empty:
        reference_healthline = healthline_row.iloc[0]["cause"]
        rouge_result, bert_healthline = evaluate_response(reference_healthline, candidate)

        # Extract ROUGE scores
        if rouge_result:
            rouge1_healthline.append(rouge_result['rouge1'].fmeasure)
            rouge2_healthline.append(rouge_result['rouge2'].fmeasure)
            rougeL_healthline.append(rouge_result['rougeL'].fmeasure)
        else:
            rouge1_healthline.append(None)
            rouge2_healthline.append(None)
            rougeL_healthline.append(None)
    else:
        bert_healthline = None
        rouge1_healthline.append(None)
        rouge2_healthline.append(None)
        rougeL_healthline.append(None)

    # Evaluate against MedlinePlus
    medlineplus_row = reference_data["MedlinePlus"][reference_data["MedlinePlus"]["disease"] == disease]
    if not medlineplus_row.empty:
        reference_medlineplus = medlineplus_row.iloc[0]["cause"]
        rouge_result, bert_medlineplus = evaluate_response(reference_medlineplus, candidate)

        # Extract ROUGE scores
        if rouge_result:
            rouge1_medlineplus.append(rouge_result['rouge1'].fmeasure)
            rouge2_medlineplus.append(rouge_result['rouge2'].fmeasure)
            rougeL_medlineplus.append(rouge_result['rougeL'].fmeasure)
        else:
            rouge1_medlineplus.append(None)
            rouge2_medlineplus.append(None)
            rougeL_medlineplus.append(None)
    else:
        bert_medlineplus = None
        rouge1_medlineplus.append(None)
        rouge2_medlineplus.append(None)
        rougeL_medlineplus.append(None)

    # Evaluate against MayoClinic
    mayoclinic_row = reference_data["MayoClinic"][reference_data["MayoClinic"]["disease"] == disease]
    if not mayoclinic_row.empty:
        reference_mayoclinic = mayoclinic_row.iloc[0]["cause"]
        rouge_result, bert_mayoclinic = evaluate_response(reference_mayoclinic, candidate)

        # Extract ROUGE scores
        if rouge_result:
            rouge1_mayoclinic.append(rouge_result['rouge1'].fmeasure)
            rouge2_mayoclinic.append(rouge_result['rouge2'].fmeasure)
            rougeL_mayoclinic.append(rouge_result['rougeL'].fmeasure)
        else:
            rouge1_mayoclinic.append(None)
            rouge2_mayoclinic.append(None)
            rougeL_mayoclinic.append(None)
    else:
        bert_mayoclinic = None
        rouge1_mayoclinic.append(None)
        rouge2_mayoclinic.append(None)
        rougeL_mayoclinic.append(None)

    # Append BERTScore results
    bert_scores_healthline.append(bert_healthline)
    bert_scores_medlineplus.append(bert_medlineplus)
    bert_scores_mayoclinic.append(bert_mayoclinic)

# Save evaluation results
evaluation_results = llm_responses.copy()
evaluation_results["BERTScore_Healthline"] = bert_scores_healthline
evaluation_results["BERTScore_MedlinePlus"] = bert_scores_medlineplus
evaluation_results["BERTScore_MayoClinic"] = bert_scores_mayoclinic

evaluation_results["ROUGE1_Healthline"] = rouge1_healthline
evaluation_results["ROUGE2_Healthline"] = rouge2_healthline
evaluation_results["ROUGEL_Healthline"] = rougeL_healthline

evaluation_results["ROUGE1_MedlinePlus"] = rouge1_medlineplus
evaluation_results["ROUGE2_MedlinePlus"] = rouge2_medlineplus
evaluation_results["ROUGEL_MedlinePlus"] = rougeL_medlineplus

evaluation_results["ROUGE1_MayoClinic"] = rouge1_mayoclinic
evaluation_results["ROUGE2_MayoClinic"] = rouge2_mayoclinic
evaluation_results["ROUGEL_MayoClinic"] = rougeL_mayoclinic

evaluation_results.to_csv(evaluation_results_file, index=False)

# Display the evaluation results
display(evaluation_results)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['ro

Unnamed: 0,Disease,Cause,BERTScore_Healthline,BERTScore_MedlinePlus,BERTScore_MayoClinic,ROUGE1_Healthline,ROUGE2_Healthline,ROUGEL_Healthline,ROUGE1_MedlinePlus,ROUGE2_MedlinePlus,ROUGEL_MedlinePlus,ROUGE1_MayoClinic,ROUGE2_MayoClinic,ROUGEL_MayoClinic
0,diabetes,Here's a breakdown of the main causes of diabe...,0.829567,0.806238,0.797601,0.508816,0.237975,0.392947,0.37551,0.106996,0.261224,0.338542,0.08377,0.135417
1,pneumonia,"According to the provided information, the mai...",0.837989,0.846802,0.820214,0.398714,0.20712,0.302251,0.375,0.139073,0.276316,0.231061,0.091255,0.140152
2,asthma,"Based on the information provided, the main ca...",0.863542,0.84435,0.859334,0.535519,0.159341,0.273224,0.422535,0.090652,0.214085,0.530259,0.197101,0.270893
3,gout,"The main causes of gout, according to the prov...",0.865035,0.836997,0.841161,0.536398,0.200772,0.268199,0.389381,0.11276,0.19469,0.467797,0.163823,0.257627
4,malaria,The main cause of malaria is infection with a ...,0.831889,0.809443,0.827908,0.445783,0.097561,0.228916,0.23662,0.07932,0.140845,0.345515,0.160535,0.212625
5,migraine,"According to the provided information, the mai...",0.833775,0.824729,0.852018,0.363636,0.098039,0.181818,0.323651,0.09205,0.141079,0.332468,0.099217,0.155844
6,appendicitis,"According to the provided information, the mai...",0.870986,0.844752,0.841365,0.504505,0.293578,0.36036,0.204082,0.0625,0.163265,0.34188,0.104348,0.205128
7,diarrhea,"The main causes of diarrhea, according to the ...",0.799915,0.815203,0.847087,0.328947,0.072848,0.177632,0.422222,0.149254,0.237037,0.447761,0.226981,0.328358
8,endometriosis,While the exact cause of endometriosis remains...,0.862615,0.839816,0.890821,0.495274,0.155598,0.238185,0.420833,0.112971,0.1875,0.590571,0.304239,0.431762
9,epilepsy,"Here are the main causes of epilepsy, based on...",0.821195,0.827714,0.852502,0.401294,0.078176,0.194175,0.140845,0.018957,0.103286,0.551148,0.192872,0.321503
