# Need CUDA

In [None]:
!pip install -U -q\
    transformers==4.31.0 \
    accelerate==0.21.0 \
    einops==0.6.1 \
    langchain==0.0.240 \
    xformers==0.0.20 \
    bitsandbytes==0.41.0 \
    optimum \
    flash-attn --no-build-isolation \
    jq \
    sentence_transformers \
    faiss-cpu
     

In [3]:
from langchain.document_loaders import JSONLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from torch import cuda, bfloat16
import transformers
import langchain
import accelerate
import bitsandbytes
import optimum
import json
from pathlib import Path
from pprint import pprint



## Load document

In [4]:
genmed='/kaggle/input/raw-medicalqa/GenMedGPT.json'
data = json.loads(Path(genmed).read_text())
data[0]

{'instruction': "If you are a doctor, please answer the medical questions based on the patient's description.",
 'input': "Doctor, I have been experiencing sudden and frequent panic attacks. I don't know what to do.",
 'output': "Well, based on what you're telling me, it sounds like you may be suffering from panic disorder. The best course of action is to start with psychotherapy and mental health counseling. Additionally, we should conduct an electrocardiogram to make sure that there are no physical issues causing your panic attacks. We will also need to perform a depression screen and a toxicology screen to rule out any other underlying causes. Finally, I would recommend a comprehensive psychological and psychiatric evaluation and therapy to help manage your symptoms."}

In [5]:
load = JSONLoader(
    file_path='/kaggle/input/raw-medicalqa/MedDialog.json',
    jq_schema='.[].description',
    text_content=False)

In [19]:
genmed='/kaggle/input/raw-medicalqa/GenMedGPT.json'
meddia = '/kaggle/input/raw-medicalqa/MedDialog.json'
loaders =[
    JSONLoader(
    file_path=meddia,
    jq_schema='.[].description',
    text_content=False),
    JSONLoader(
    file_path=genmed,
    jq_schema='.[].input',
    text_content=False)
]
docs = []


In [7]:
for loader in loaders:
    docs.extend(loader.load())

## Embeddings and Vectorstore

In [8]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

Downloading .gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

Downloading 1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading README.md:   0%|          | 0.00/90.3k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [10]:
db = FAISS.from_documents(documents = docs, embedding = embeddings )

Batches:   0%|          | 0/203 [00:00<?, ?it/s]

## RAG

In [44]:
ret_docs = db.max_marginal_relevance_search("i am a little concerned that i may be showing some symptoms for coronavirus",k=5)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [61]:
rag = []
for doc in ret_docs:
    if doc.metadata["source"] == genmed:
        context = json.loads(Path(genmed).read_text())[doc.metadata['seq_num']]
        input = context["input"]
        output = context["output"]
        rag.append(f"patient: {input}\ndoctor: {output}")
    if doc.metadata["source"] == meddia:
        context = "\n".join(json.loads(Path(meddia).read_text())[doc.metadata['seq_num']]['utterances'])
        rag.append(f"{context}")
rag = "\n\n".join(rag)
      
        
        

In [73]:
## write it as a function
def RAG(input,k=5):
    ret_docs = db.max_marginal_relevance_search(input,k)
    rag = []
    for doc in ret_docs:
        if doc.metadata["source"] == genmed:
            context = json.loads(Path(genmed).read_text())[doc.metadata['seq_num']]
            input = context["input"]
            output = context["output"]
            rag.append(f"patient: {input}\ndoctor: {output}")
        if doc.metadata["source"] == meddia:
            context = "\n".join(json.loads(Path(meddia).read_text())[doc.metadata['seq_num']]['utterances'])
            rag.append(f"{context}")
    rag = "\n\n".join(rag)
    return rag
      
    

## Model

In [62]:
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
class Config:
    #path=Path('/kaggle/input/purdue-data-for-good-2023')
    bnb_config = transformers.BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=bfloat16
    )
    hf_auth="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"
    model_id = 'llSourcell/medllama2_7b'    
    
sys_config=Config()


In [63]:
model_config = transformers.AutoConfig.from_pretrained(
    sys_config.model_id,
    use_auth_token=sys_config.hf_auth
)

Downloading config.json:   0%|          | 0.00/587 [00:00<?, ?B/s]

In [64]:
model = transformers.AutoModelForCausalLM.from_pretrained(
    sys_config.model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=sys_config.bnb_config,
    device_map='auto',
    use_auth_token=sys_config.hf_auth
)
print(f"Model loaded on {device}")



Downloading (…)fetensors.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0


In [65]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    sys_config.model_id,
    use_auth_token=sys_config.hf_auth
)
    

Downloading tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]



Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [66]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature = 1e-5, 
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)
     

In [67]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [70]:
prompt = f'''system message: you are a doctor diagnosis patient's symptom and provide solution.
here is some relevant talk between doctors and patients.
####
{rag}
####
user message: patient: I am a bit tired. what can I do?
Assistant role: doctor: '''

In [71]:
llm(prompt)

" It sounds like you might be experiencing fatigue, which is a common symptom of many medical conditions. To help you better, I would need more information about your symptoms and medical history. Can you tell me a bit more about what you're experiencing? For example, how long have you been feeling tired, and do you have any other symptoms such as fever, chills, or muscle aches? Additionally, have you recently traveled or been exposed to anyone with similar symptoms? Any information you can provide will help me better understand your situation and provide a more accurate response."

In [74]:
def reply(input):
    rag = RAG(input)
    prompt = f'''system message: you are a doctor diagnosis patient's symptom and provide solution.
here is some relevant talk between doctors and patients.
####
{rag}
####
user message: patient: {input}.
Assistant role: doctor: '''
    return llm(prompt)
    
    
    

In [75]:
reply("I can not eat food")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

' Sorry to hear that. Have you experienced any recent changes or stressors that may be contributing to this? Additionally, have you noticed any other symptoms such as stomach pain, nausea, or diarrhea? These details will help me better understand your situation and provide appropriate guidance.'