In [1]:
!pip install -U bitsandbytes
!pip install transformers accelerate faiss-cpu sentence-transformers streamlit

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl (76.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 MB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.4
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading streamlit-1.44.1-py3-none-any.whl (9.8 M

In [2]:
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer

docs_df = pd.read_pickle("/kaggle/input/genai-a05/docs_with_embeddings (1).pkl")
print("Loaded DataFrame:")
print(docs_df.head())
print("Columns:", docs_df.columns.tolist())

Loaded DataFrame:
                               label  \
0                           Migraine   
1  Gastro-oesophageal Reflux Disease   
2               Peptic Ulcer Disease   
3                             Stroke   
4                 Multiple Sclerosis   

                                                text           source  \
0  suspected epilepsy: risk factors : genetic pre...  knowledge_graph   
1  suspected gastro-oesophageal reflux disease: r...  knowledge_graph   
2  suspected peptic ulcer disease: risk factors :...  knowledge_graph   
3  suspected stroke: risk factors : hypertension,...  knowledge_graph   
4  suspected multiple sclerosis: risk factors : g...  knowledge_graph   

                                          embeddings  
0  [0.013505981303751469, -0.07202115654945374, -...  
1  [0.0694703608751297, -0.04019218683242798, -0....  
2  [0.07076480984687805, -0.09245926141738892, -0...  
3  [-0.041953809559345245, -0.06580012291669846, ...  
4  [0.027151813730597496, -

In [3]:
# FAISS
embeddings = np.array(docs_df['embeddings'].tolist(), dtype=np.float32)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print("FAISS Index Size:", index.ntotal)

FAISS Index Size: 535


In [4]:
minilm = SentenceTransformer('all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
import re

def preprocess_text(text):
    text = text.lower()
    text = text.replace('\n', ' ').replace('\t', ' ')
    text = re.sub(r'[^\w\s.,;:>-]', ' ', text)
    text = ' '.join(text.split()).strip()
    return text

In [6]:
def retrieve_docs(query, k=5):
    query_embedding = minilm.encode([query], show_progress_bar=False)[0].astype(np.float32)
    distances, indices = index.search(np.array([query_embedding]), k)
    retrieved_docs = docs_df.iloc[indices[0]][['label', 'text', 'source']]
    retrieved_docs['distance'] = distances[0]
    return retrieved_docs

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

from huggingface_hub import login
login(token='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')

model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,  # 4-bit quantization to fit Kaggle GPU
    device_map="auto",  # Auto-assign to GPU
    torch_dtype=torch.float16  # Reduce memory further
)

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [8]:
def rag_pipeline(query, k=5):
    retrieved_docs = retrieve_docs(query, k)
    context = "\n".join(retrieved_docs['text'].tolist())
    
    prompt = (
        f"<s>[INST] Query: {query}\n"
        f"Relevant Context: {context}\n"
        f"You are a medical AI assistant diagnosing patients based on their query, using relevant context from past records of some other different patients. Generate a concise and accurate response to the query.[/INST]"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=75,
        do_sample=False,    # Disable sampling to reduce hallucination
        temperature=0.05,
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True).split("[/INST]")[-1].strip()
    # Ensure complete sentences
    if not answer.endswith('.'):
        last_period = answer.rfind('.')
        if last_period != -1:
            answer = answer[:last_period + 1]
        else:
            answer += "."
    return retrieved_docs, answer

In [10]:
# query = "What’s the diagnosis for a 55-year-old male patient with a history of hypertension and smoking, presenting with shortness of breath, persistent cough, and fatigue over the past two weeks?"
query = "What's the diagnosis for a patient who has stomach pain."
query = preprocess_text(query)
docs, answer = rag_pipeline(query)
# print("Retrieved Documents:\n", docs)
print("Generated Answer:\n", answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generated Answer:
 Based on the patient's symptoms and medical history, it is possible that they are experiencing a gastrointestinal (GI) issue such as gastritis or an ulcer. The patient's history of alcohol use and recent stress may have contributed to the development of this issue.
