In [1]:
import os
import requests
import json
import numpy as np
from typing import List, Dict, Any, Optional
from openai import OpenAI
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
from dotenv import load_dotenv

In [2]:
from astrapy import DataAPIClient

In [3]:
load_dotenv()
openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [4]:
def connect_to_astradb():
    ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
    ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")

    client = DataAPIClient(token=ASTRA_DB_APPLICATION_TOKEN)
    database = client.get_database(ASTRA_DB_API_ENDPOINT)

    collection = database.get_collection("chat_radiology")
    return collection

In [5]:
def generate_embedding(text: str) -> List[float]:
    """Generate embedding for the given text using OpenAI's embedding model"""
    response = openai_client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

In [6]:
def search_documents(query: str, collection) -> List[Dict[str, Any]]:
    embedding = generate_embedding(query)
    cursor = collection.find(
        None,  
        sort={
            "$vector": embedding
        },
        limit=5
    )
    documents = list(cursor)
    
    return documents

In [7]:
def format_context(documents: List[Dict[str, Any]]) -> str:
    
    context = ""
    for doc in documents:
        filename = doc.get("category", "dokumen tidak diketahui")
        context += f"Sumber: {filename}\n{doc.get('text', '')}\n\n"
    return context

In [8]:
def create_rag_prompt(context: str, query: str) -> str:
    
    system_prompt = f"""
    Anda adalah asisten medis yang membantu menjawab pertanyaan berdasarkan informasi berikut:
    START CONTEXT
    {context}
    END CONTEXT

    Instruksi:
    1. Jawablah pertanyaan secara panjang, terstruktur, dan sangat mendetail.
    2. Apabila informasi berupa gejala-gejala yang dialami pasien, berikan lima kemungkinan diagnosis berdasarkan informasi tersebut, dengan penjelasan singkat yang menghubungkan gejala dengan setiap diagnosis. Kemudian, berikan saran kepada dokter untuk investigasi gejala lebih lanjut berdasarkan informasi dari dalam dokumen.
    3. Jika gejalanya menyangkut tentang nyeri atau sakit di ulu hati atau perut bagian atas, selalu berikan dispepsia sebagai salah satu diagnosis kemungkinan.
    4. Jika gejala yang disampaikan berhubungan dengan demam, seperti demam tinggi atau demam lebih dari 2 hari, selalu berikan demam biasa sebagai salah satu diagnosis kemungkinan, dan sertakan informasi tentang demam sedikit lebih detail seperti cara obat yang diperlukan.
    5. Jika gejala yang disampaikan berhubungan dengan nafas sesak atau batuk, kemungkinan besar pasien terkena penyakit asthma. Jadi, selalu berikan asthma sebagai salah satu diagnosis kemungkinan.
    6. Jika pertanyaan berkaitan dengan suatu penyakit (misalnya "Pasien terkena TB Paru. Jelaskan..."), berikan penjelasan selengkap dan sedetail mungkin tentang penyakit tersebut, mengacu secara langsung pada informasi yang ada dalam konteks (dokumen). Sertakan informasi tentang etiologi, gejala khas, diagnosa, penatalaksanaan, dan referensi dokumen. Jika tersedia, sebutkan nama file asal informasi tersebut.
    7. Apabila informasi bukan berupa gejala tetapi pertanyaan umum, jawablah secara lengkap dan rujuk sumber informasi dari dokumen dengan menyebutkan nama file.
    8. Jika Anda tidak yakin dengan jawaban (informasi tidak terdapat dalam konteks) atau pertanyaan kurang spesifik, katakan Anda tidak yakin dengan jawaban Anda.
    9. Jangan menambahkan informasi di luar konteks yang diberikan.
    10. Pastikan jawaban akurat, berdasarkan pedoman klinis resmi atau dokumen yang tersedia.
    11. Berikan peringatan bahwa ini bukan pengganti nasihat medis profesional dan pasien harus berkonsultasi dengan dokter untuk diagnosis dan perawatan yang akurat.
    12. Di akhir jawaban, sampaikan bahwa jawaban diambil berdasarkan Keputusan Menteri Kesehatan Republik Indonesia Nomor HK.01.07/MENKES/1186/2022 tentang Panduan Praktik Klinis bagi Dokter di Fasilitas Pelayanan Kesehatan Tingkat Pertama.
    """
    
    return f"{system_prompt}\n\n{query}"

In [9]:
def call_deepseek_api(prompt: str, model: str = "deepseek/deepseek-chat:free") -> str:
    
    url = "https://openrouter.ai/api/v1/chat/completions"
    
    headers = {
        "Authorization": f"Bearer {os.environ.get('OPENROUTER_API_KEY')}",
        "HTTP-Referer": "https://radiologi.com",
        "X-Title": "Radiology GPT",
        "Content-Type": "application/json"
    }
    
    data = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ]
    }
    
    response = requests.post(url, headers=headers, json=data)
    
    if response.status_code != 200:
        raise Exception(f"Error calling DeepSeek API: {response.text}")
    
    return response.json()["choices"][0]["message"]["content"]

In [10]:
def process_with_rag(query: str, use_deepseek_r1: bool = False) -> str:
    model = "deepseek/deepseek-r1:free" if use_deepseek_r1 else "deepseek/deepseek-chat:free"
    collection = connect_to_astradb()
    documents = search_documents(query, collection)
    
    context = format_context(documents)
    prompt = create_rag_prompt(context, query)
    response = call_deepseek_api(prompt, model)
    
    return response

In [13]:
query = """Pasien mengalami demam pada waktu malam. Sebelum masuk rumah sakit, 
pasien merasa mual, muntah dan menggigil. Pasien didiagnosis penyakit apa?"""
    
response = process_with_rag(query, True)
print(response)

**Diagnosis Berdasarkan Gejala Pasien:**

1. **Malaria**  
   - **Alasan:** Gejala demam yang hilang timbul (terutama pada malam hari), menggigil, mual, muntah, dan riwayat potensial tinggal di daerah endemis malaria sesuai dengan gambaran klinis malaria (Sumber: Penyakit Umum). Parasit *Plasmodium* menyebabkan demam periodik akibat siklus replikasi dalam eritrosit.  
   - **Pemeriksaan Penunjang:** Tes darah tepi (mencari parasit malaria), pemeriksaan *rapid diagnostic test* (RDT) malaria, dan hitung darah lengkap (trombositopenia mungkin terdeteksi).

2. **Demam Tifoid**  
   - **Alasan:** Demam yang mungkin berkepanjangan, disertai gejala gastrointestinal (mual, muntah). Demam tifoid sering disertai anoreksia, nyeri abdomen, dan komplikasi seperti gangguan kesadaran (Sumber: Digestive).  
   - **Pemeriksaan Penunjang:** Tes Widal, kultur darah, atau pemeriksaan PCR untuk *Salmonella typhi*.

3. **Leptospirosis**  
   - **Alasan:** Demam mendadak, menggigil, mialgia (nyeri otot betis

In [11]:
import pandas as pd
import nltk
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from bert_score import score

In [22]:
df = pd.read_excel('dr_ground_truth.xlsx')
# df = df.head(5)
df = df[['No','Question','Answer','Full Answer']]
df

Unnamed: 0,No,Question,Answer,Full Answer
0,1.0,Pasien mengalami demam pada waktu malam. Sebel...,Demam Tifoid 70%,"Berdasarkan gejala yang disampaikan, saya akan..."
1,,,Demam Berdarah Dengue 30%,
2,2.0,Pasien mengalami buang air besar cair lebih da...,Gastroenteritis (GE) Akut 70%,"Berdasarkan informasi klinis yang diberikan, s..."
3,,,Intoleransi Laktosa 30%,
4,3.0,Pasien datang dengan keluhan demam selama 6 ha...,DBD 80%,"Berdasarkan informasi yang diberikan, saya aka..."
...,...,...,...,...
93,44.0,Pasien sesak nafas sejak 4 jam sebelum masuk r...,Bronkitis Akut 50%,"Berdasarkan gejala yang disampaikan, berikut l..."
94,,,Infeksi saluran pernapasan 70%,
95,,,Gangguan kardiovaskular 50%,
96,45.0,Pasien sesak sejak 4 jam sebelum masuk rs dan ...,Asma Bronkial 70%,"Berdasarkan informasi yang diberikan, saya aka..."


In [None]:
df['No'] = df['No'].fillna(method='ffill')
df['Question'] = df['Question'].fillna(method='ffill')

grouped_df = df.groupby(['No', 'Question'], as_index=False).agg({
    'Answer': lambda x: ', '.join(x.dropna()),  # or list(x.dropna()) if you prefer a list
    'Full Answer': 'first'  # Only keep one LLM response per question
})
grouped_df


  df['No'] = df['No'].fillna(method='ffill')
  df['Question'] = df['Question'].fillna(method='ffill')


Unnamed: 0,No,Question,Answer,Full Answer
0,1.0,Pasien mengalami demam pada waktu malam. Sebel...,"Demam Tifoid 70%, Demam Berdarah Dengue 30%","Berdasarkan gejala yang disampaikan, saya akan..."
1,2.0,Pasien mengalami buang air besar cair lebih da...,"Gastroenteritis (GE) Akut 70%, Intoleransi Lak...","Berdasarkan informasi klinis yang diberikan, s..."
2,3.0,Pasien datang dengan keluhan demam selama 6 ha...,DBD 80%,"Berdasarkan informasi yang diberikan, saya aka..."
3,4.0,Pasien menderita demam disertai munculnya brun...,"Infeksi Bakteri/Virus (Sistemik) 50%, Infeksi ...","Berdasarkan informasi klinis yang diberikan, s..."
4,5.0,Pasien nyeri dada sejak 4 jam lalu seperti ter...,"Angina Pekrotis 80%, Infark Miokard Akut (Sera...",Berdasarkan informasi yang diberikan dalam kon...
5,6.0,Pasien mengalami sesak nafas sejak siang. #Has...,"Asma 100%, Bronkitis Akut 70%, Penyakit Paru O...",Berdasarkan informasi klinis yang diberikan da...
6,7.0,Pasien mengalami sesak napas sejak sehari lalu...,Bronkitis Akut 100%,Berdasarkan informasi gejala dan pemeriksaan f...
7,8.0,"Pasien mengalami sesak nafas sekitar 3 hari, t...","Bronkitis Akut 80%, Asma Bronkial 50%",Berdasarkan informasi yang tersedia dalam kont...
8,9.0,Pasien nyeri ulu hati/perut bagian atas sejak ...,"Dispepsia 100%, Pneumonia 50%, Infeksi Saluran...","Berdasarkan informasi gejala yang disampaikan,..."
9,10.0,Pasien merasa nyeri di ulu hati sampai terasa ...,"Dispepsia 80%, refluk Gastroesofagus (GERD) 90%","Berdasarkan gejala yang Anda sebutkan, berikut..."


In [21]:
df["llm_response"] = df["PARAPHRASE KELUHAN UTAMA"].apply(process_with_rag)

In [31]:
cosine_similarities = []
bleu_scores = []
meteor_scores = []
bertP_scores = []
bertR_scores = []
bertF1_scores = []

In [32]:
for index, row in grouped_df.iterrows():
    print("index ", index)
    ground_truth = str(row["Answer"]).lower()  
    llm_output = str(row["Full Answer"]).lower()  

    if not ground_truth or not llm_output:  
        cosine_similarities.append(-1)
        bertP_scores.append(-1)
        bertR_scores.append(-1)
        bertF1_scores.append(-1)
        bleu_scores.append(-1)
        meteor_scores.append(-1)
        continue

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([ground_truth, llm_output])
    cosine_sim = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
    cosine_similarities.append(cosine_sim)

    bleu = sentence_bleu([ground_truth.split()], llm_output.split())
    bleu_scores.append(bleu)

    meteor = meteor_score([ground_truth.split()], llm_output.split())
    meteor_scores.append(meteor)

    P, R, F1 = score([llm_output], [ground_truth], lang="en", verbose=False)
    bertP_scores.append(P[0].item())
    bertR_scores.append(R[0].item())
    bertF1_scores.append(F1[0].item())

index  0


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  1


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  2


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  3


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  4


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  5


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  6


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  7


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  8


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  9


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  10


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  11


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  12


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  13


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  14


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  15


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  16


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  17


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  18


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  19


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  20


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  21


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  22


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  23


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  24


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  25


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  26


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  27


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  28


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  29


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  30


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  31


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  32


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  33


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  34


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  35


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  36


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  37


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  38


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  39


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  40


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  41


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  42


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  43


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


index  44


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [34]:
grouped_df["cosine_similarity"] = cosine_similarities[:len(grouped_df)]
grouped_df["bleu_score"] = bleu_scores[:len(grouped_df)]
grouped_df["meteor_score"] = meteor_scores[:len(grouped_df)]
grouped_df["bert P"] = bertP_scores[:len(grouped_df)]
grouped_df["bert R"] = bertR_scores[:len(grouped_df)]
grouped_df["bert F1"] = bertF1_scores[:len(grouped_df)]

grouped_df[["Question", "Answer", "Full Answer", 
    "bleu_score", "cosine_similarity", "meteor_score", "bert P", "bert R", "bert F1"]].head()

Unnamed: 0,Question,Answer,Full Answer,bleu_score,cosine_similarity,meteor_score,bert P,bert R,bert F1
0,Pasien mengalami demam pada waktu malam. Sebel...,"Demam Tifoid 70%, Demam Berdarah Dengue 30%","Berdasarkan gejala yang disampaikan, saya akan...",5.664578e-79,0.252498,0.210377,0.723584,0.81605,0.76704
1,Pasien mengalami buang air besar cair lebih da...,"Gastroenteritis (GE) Akut 70%, Intoleransi Lak...","Berdasarkan informasi klinis yang diberikan, s...",4.79163e-79,0.062888,0.194378,0.727156,0.832998,0.776487
2,Pasien datang dengan keluhan demam selama 6 ha...,DBD 80%,"Berdasarkan informasi yang diberikan, saya aka...",0.0,0.041613,0.0,0.665408,0.805334,0.728715
3,Pasien menderita demam disertai munculnya brun...,"Infeksi Bakteri/Virus (Sistemik) 50%, Infeksi ...","Berdasarkan informasi klinis yang diberikan, s...",4.351407e-79,0.066632,0.107829,0.747812,0.853032,0.796964
4,Pasien nyeri dada sejak 4 jam lalu seperti ter...,"Angina Pekrotis 80%, Infark Miokard Akut (Sera...",Berdasarkan informasi yang diberikan dalam kon...,0.02019927,0.140566,0.231846,0.74437,0.849195,0.793335


In [36]:
grouped_df.describe()

Unnamed: 0,No,cosine_similarity,bleu_score,meteor_score,bert P,bert R,bert F1
count,45.0,45.0,45.0,45.0,45.0,45.0,45.0
mean,23.0,0.093949,0.004044874,0.127392,0.71893,0.826422,0.768706
std,13.133926,0.06284,0.006912677,0.069796,0.026998,0.017621,0.019841
min,1.0,0.0,0.0,0.0,0.655939,0.778179,0.723521
25%,12.0,0.047159,1.4735139999999999e-155,0.067265,0.70151,0.81886,0.755828
50%,23.0,0.074534,4.419622e-79,0.116162,0.723584,0.827084,0.774335
75%,34.0,0.131133,0.01146919,0.190769,0.741282,0.835486,0.7834
max,45.0,0.252498,0.02019927,0.256085,0.763659,0.86064,0.796964
