In [1]:
import requests
import json
from dotenv import load_dotenv
import os
import pandas as pd
import nltk
import tqdm
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from rouge_score import rouge_scorer
from sklearn.metrics import precision_score, recall_score

In [2]:
import nltk
# nltk.download("wordnet")
# nltk.download("punkt")

In [2]:
df = pd.read_excel('evaulation/dr_ground_truth.xlsx')
df = df[['No','Question','Answer']]

In [12]:
load_dotenv()
def query_deepseek(prompt):
    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY2')}",
            "Content-Type": "application/json",
        },
        data=json.dumps({
            "model": "deepseek/deepseek-chat:free",
            "messages": [{"role": "user", "content": prompt}]
        })
    )
    try:
        response.raise_for_status()
        result = response.json()
        return result["choices"][0]["message"]["content"]  
    except requests.exceptions.HTTPError as e:
        print("HTTP error:", response.status_code, response.text)
        return "HTTP ERROR"
    except Exception as e:
        print("Response content:", response.text)  # raw content to see what went wrong
        print("JSON response:", response.json())
        print("General error:", e)
        return "GENERAL ERROR"

In [4]:
df.dropna(subset=['Question'], inplace=True)
df
# df["llm_response"] = df["PARAPHRASE KELUHAN UTAMA"].apply(query_deepseek)

Unnamed: 0,No,Question,Answer
0,1.0,Pasien mengalami demam pada waktu malam. Sebel...,Demam Tifoid 70%
2,2.0,Pasien mengalami buang air besar cair lebih da...,Gastroenteritis (GE) Akut 70%
4,3.0,Pasien datang dengan keluhan demam selama 6 ha...,DBD 80%
5,4.0,Pasien menderita demam disertai munculnya brun...,Infeksi Bakteri/Virus (Sistemik) 50%
7,5.0,Pasien nyeri dada sejak 4 jam lalu seperti ter...,Angina Pekrotis 80%
9,6.0,Pasien mengalami sesak nafas sejak siang. #Has...,Asma 100%
12,7.0,Pasien mengalami sesak napas sejak sehari lalu...,Bronkitis Akut 100%
13,8.0,"Pasien mengalami sesak nafas sekitar 3 hari, t...",Bronkitis Akut 80%
15,9.0,Pasien nyeri ulu hati/perut bagian atas sejak ...,Dispepsia 100%
18,10.0,Pasien merasa nyeri di ulu hati sampai terasa ...,Dispepsia 80%


In [5]:
dfs = np.array_split(df, np.ceil(len(df) / 10))
dfs[1]

  return bound(*args, **kwds)


Unnamed: 0,No,Question,Answer
18,10.0,Pasien merasa nyeri di ulu hati sampai terasa ...,Dispepsia 80%
20,11.0,Pasien mengalami buang air besar cair 5 kali l...,Gastroenteritis Akut 70%
22,12.0,"Pasien nyeri pinggang kanan sejak 6 jam lalu, ...",Batu Ginjal 70%
26,13.0,Pasien mengalami nyeri perut sejak sehari lalu...,Peritonitis 50%
29,14.0,"Pasien mengalami nyeri perut, mulas, mual, mun...",Kolesistitis Akut 70%
31,15.0,Pasien mengalami nyeri hebat di perut bawah (b...,Appendisitis Akut 70%
33,16.0,Pasien muntah-muntah sejak 3 hari lalu setiap ...,Dispepsia 80%
35,17.0,"Pasien mengalami demam, menggigil, sulit tidur...",Infeksi Saluran pernapasan atas 70%
37,18.0,"Pasien demam tinggi hari ke 3, batuk berdahak,...",Infeksi Saluran Pernapasan atas 80%


In [13]:
i = 3
dfs[i]["llm_response"] = dfs[i]["Question"].apply(query_deepseek)

In [16]:
i = 4
dfs[i]["llm_response"] = dfs[i]["Question"].apply(query_deepseek)

In [18]:
merged_df = pd.concat([dfs[3], dfs[4]], ignore_index=True)
merged_df.to_excel('sisa_non_rag_deepseek.xlsx')

In [9]:
print(dfs[0]['Question'][0])

Pasien mengalami demam pada waktu malam. Sebelum masuk rumah sakit, pasien merasa mual, muntah dan menggigil. Pasien didiagnosis penyakit apa? Gunakan jawaban dalam bahasa Indonesia.


In [10]:
cosine_similarities = []
precisions = []
recalls = []
bleu_scores = []
meteor_scores = []
# rouge_l_scores = []

In [11]:
for index, row in df.iterrows():
    ground_truth = str(row["diagnosa_utama"]).lower()  
    llm_output = str(row["llm_response"]).lower()  

    if not ground_truth or not llm_output:  
        cosine_similarities.append(-1)
        precisions.append(-1)
        recalls.append(-1)
        bleu_scores.append(-1)
        meteor_scores.append(-1)
        # rouge_l_scores.append(-1)
        continue

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([ground_truth, llm_output])
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    cosine_similarities.append(cosine_sim)

    truth_words = set(ground_truth.split())
    response_words = set(llm_output.split())

    true_positives = len(truth_words & response_words)
    false_positives = len(response_words - truth_words)
    false_negatives = len(truth_words - response_words)

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

    precisions.append(precision)
    recalls.append(recall)

    bleu = sentence_bleu([ground_truth.split()], llm_output.split())
    bleu_scores.append(bleu)

    meteor = meteor_score([ground_truth.split()], llm_output.split())
    meteor_scores.append(meteor)

    # rouge = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=True)
    # rouge_score_l = rouge.score(ground_truth, llm_output)["rougeL"].fmeasure
    # rouge_l_scores.append(rouge_score_l)

KeyError: 'diagnosa_utama'

In [None]:
df["cosine_similarity"] = cosine_similarities[:len(df)]
df["precision"] = precisions[:len(df)]
df["recall"] = recalls[:len(df)]
df["bleu_score"] = bleu_scores[:len(df)]
df["meteor_score"] = meteor_scores[:len(df)]
# df["rouge_l"] = rouge_l_scores[:len(df)]

df[["PARAPHRASE KELUHAN UTAMA", "diagnosa_utama", "llm_response", "bleu_score", "cosine_similarity", "precision", "recall", "meteor_score"]]

Unnamed: 0,PARAPHRASE KELUHAN UTAMA,diagnosa_utama,llm_response,bleu_score,cosine_similarity,precision,recall,meteor_score
0,Pasien mengalami demam pada waktu malam. Sebel...,Demam tifoid,"Berdasarkan gejala yang Anda sebutkan, yaitu d...",4.545727e-232,0.193615,0.006579,0.5,0.018116
1,Pasien mengalami buang air besar cair lebih da...,Other gastroenteritis and colitis of infectiou...,"Berdasarkan gejala yang Anda jelaskan, kemungk...",5.296346e-232,0.018217,0.009346,0.125,0.022624
2,Pasien datang dengan keluhan demam selama 6 ha...,Dengue fever [classical dengue],"Berdasarkan gejala yang Anda deskripsikan, pas...",5.106573e-232,0.136049,0.007937,0.25,0.025253
3,Pasien menderita demam disertai munculnya brun...,Zoster [herpes zoster],Berdasarkan gejala dan hasil pemeriksaan yang ...,4.388531e-232,0.089488,0.005848,0.333333,0.015432
4,Pasien nyeri dada sejak 4 jam lalu seperti ter...,hipertensi urgensi,"Berdasarkan gejala yang Anda deskripsikan, yai...",0.0,0.0,0.0,0.0,0.0


In [None]:
print(df['llm_response'][0])

Berdasarkan gejala yang Anda sebutkan, yaitu demam yang terjadi pada malam hari, disertai mual, muntah, dan menggigil, ada beberapa kemungkinan diagnosis yang dapat dipertimbangkan. Namun, untuk diagnosis yang akurat, diperlukan pemeriksaan lebih lanjut oleh dokter, termasuk pemeriksaan fisik, tes laboratorium, dan riwayat medis lengkap.

Beberapa kemungkinan penyakit yang dapat menyebabkan gejala tersebut:

1. **Malaria**:  
   - Gejala khas malaria termasuk demam yang bersifat periodik (terjadi pada waktu tertentu, seperti malam hari), menggigil, mual, muntah, dan nyeri otot.  
   - Penyakit ini disebabkan oleh parasit *Plasmodium* yang ditularkan melalui gigitan nyamuk *Anopheles*.  

2. **Demam Berdarah Dengue (DBD)**:  
   - Gejalanya meliputi demam tinggi mendadak, sakit kepala parah, nyeri sendi dan otot, mual, muntah, dan terkadang ruam kulit.  
   - DBD disebabkan oleh virus dengue yang ditularkan melalui gigitan nyamuk *Aedes aegypti*.  

3. **Tifoid (Demam Tifoid)**:  
   - 