In [1]:
import os
import pandas as pd

folder_path = "/Users/avyaahuja/doctor/dataset/"

category_mapping = {
    "RES": "respiratory",
    "MSK": "musculoskeletal",
    "CAR": "cardiac",
    "DER": "dermatological",
    "GAS": "gastrointestinal"
}

def preprocess_all_conversations(folder_path):
    data = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):  
            prefix = filename.split('.')[0][:3] 
            category = category_mapping.get(prefix, "Unknown") 
            
            file_path = os.path.join(folder_path, filename)
            with open(file_path, "r", encoding='ISO-8859-1') as file: 
                conversations = file.readlines()

                doctor = ""
                patient = ""
                conversation_pairs = []

                for line in conversations:
                    line = line.strip()

                    if line.startswith("D:"):
                        doctor = line.replace("D:", "").strip()

                    elif line.startswith("P:"):
                        patient = line.replace("P:", "").strip()

                        if doctor and patient:
                            conversation_pairs.append({
                                "type": category,
                                "doctor": doctor,
                                "patient": patient
                            })
                            doctor = ""  
                            patient = ""  

                data.extend(conversation_pairs)

    return pd.DataFrame(data)

df = preprocess_all_conversations(folder_path)

df.to_csv("/Users/avyaahuja/doctor/data/processed.csv", index=False)
df.head()


Unnamed: 0,type,doctor,patient
0,respiratory,How may I help you?,"Hi, umm, so I've had a sore throat for the pas..."
1,respiratory,"Yeah, for sure. So you said that the sore thro...","Neither, it's been the same."
2,respiratory,"OK, is it painful to swallow food or liquids?","It's painful to swallow solids, yeah."
3,respiratory,"OK, uh, and how is your, umm, have you had any...","No, not really. No."
4,respiratory,OK. And have you noticed any like neck swelling?,No.


In [2]:
resp_df = df[df['type'] == 'respiratory']
musc_df = df[df['type'] == 'musculoskeletal']
cardiac_df = df[df['type'] == 'cardiac']
derm_df = df[df['type'] == 'dermatological']
gastro_df = df[df['type'] == 'gastrointestinal']

In [3]:
print(resp_df.shape,musc_df.shape,cardiac_df.shape,derm_df.shape,gastro_df.shape)

(10057, 3) (2478, 3) (228, 3) (40, 3) (237, 3)


In [4]:
resp_df.to_csv("/Users/avyaahuja/doctor/data/resp.csv", index=False)
musc_df.to_csv("/Users/avyaahuja/doctor/data/musc.csv", index=False)
cardiac_df.to_csv("/Users/avyaahuja/doctor/data/cardiac.csv", index=False)
derm_df.to_csv("/Users/avyaahuja/doctor/data/derm.csv", index=False)
gastro_df.to_csv("/Users/avyaahuja/doctor/data/gastro.csv", index=False)

In [5]:
# from langchain_community.document_loaders import CSVLoader

# loader = CSVLoader(file_path='processed.csv')
# docs = loader.load()


In [6]:
from langchain_community.document_loaders import CSVLoader

loader = CSVLoader(file_path='/Users/avyaahuja/doctor/data/derm.csv')
derm_docs = loader.load()

In [7]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
chunk_docs=text_splitter.split_documents(derm_docs)
chunk_docs

[Document(metadata={'source': '/Users/avyaahuja/doctor/data/derm.csv', 'row': 0}, page_content="type: dermatological\ndoctor: Hi, so you I understand you have a rash on your leg.\npatient: I don't really know what I would call it. That's what I told the nurse when I came in, but it more hurts than anything."),
 Document(metadata={'source': '/Users/avyaahuja/doctor/data/derm.csv', 'row': 1}, page_content="type: dermatological\ndoctor: OK, uh, how long is this been going on for?\npatient: Ah, it's been it's been looking weird for about a week, but only yes, it started hurting yesterday and started looking strange and I just wanted to come in to get it checked out."),
 Document(metadata={'source': '/Users/avyaahuja/doctor/data/derm.csv', 'row': 2}, page_content="type: dermatological\ndoctor: OK when you say it's looking strange, can you tell me what you mean by that?\npatient: Swollen, it looks bigger than it did before, and meaner, if that makes sense."),
 Document(metadata={'source': '/

In [9]:
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
# chunk_docs=text_splitter.split_documents(docs)
# chunk_docs

In [10]:
derm_df.shape

(40, 3)

In [11]:
# df.shape

In [12]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db=FAISS.from_documents(chunk_docs,OllamaEmbeddings())
db

<langchain_community.vectorstores.faiss.FAISS at 0x7f9f02f83850>

In [13]:
# derm_df

In [14]:
query="I have a rash which is looking wierd"
result=db.similarity_search(query)
text=result[0].page_content

In [15]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import Ollama

llm = Ollama(model="llama2")

prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template="""
    You are a helpful AI assistant. Based on the given context, provide a direct and concise answer to the query.
    
    Query: {query}
    Context: {context}
    
    Answer:"""
)

llm_chain = LLMChain(llm=llm, prompt=prompt_template)

response = llm_chain.run({"query": query, "context": text})
print(response)

  llm_chain = LLMChain(llm=llm, prompt=prompt_template)
  response = llm_chain.run({"query": query, "context": text})


Oh no! Sorry to hear that you have a weird rash on your leg. Based on what you've shared with me, it's difficult to provide a definitive diagnosis without a thorough examination and medical history. However, I can offer some possible causes of a rash on the leg that may be worth discussing with your doctor.

One possibility is an allergic reaction, which could be triggered by something you came into contact with, such as a new soap or lotion, or even something in your environment. Another possibility is a bacterial or fungal infection, which can cause a rash to appear on the skin. Additionally, it's possible that the rash could be related to an underlying medical condition, such as eczema or psoriasis.

It's important to consult with your doctor to get a proper diagnosis and treatment plan. They will be able to examine the rash and ask you questions about your symptoms and medical history to determine the cause of the rash. In the meantime, you can try some over-the-counter treatments 

In [16]:
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnableLambda
from langchain.llms import Ollama

llm = Ollama(model="llama2")

prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template="""
    You are a helpful AI assistant. Based on the given context, provide a direct and concise answer to the query.
    
    Query: {query}
    Context: {context}
    
    Answer:"""
)

runnable_chain = prompt_template | llm

response = runnable_chain.invoke({"query": query, "context": text})
print(response)


Doctor: Okay, thank you for letting me know. Based on your description, it sounds like you may have developed an irritation or infection on your leg. Have you tried any over-the-counter treatments such as hydrocortisone cream or calamine lotion? If the rash is severe or doesn't improve with self-care measures, we may need to prescribe a stronger medication or perform further testing to determine the cause.
