In [1]:
import os
import glob
from dotenv import load_dotenv
import gradio as gr
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.callbacks import StdOutCallbackHandler
from langchain.vectorstores import FAISS
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
MODEL = "gpt-4o-mini"
db_name = "vector_db_patient" 

In [3]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [9]:
folders = glob.glob("knowledge_base_patient_data/mtsample_dictations/*")

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    print(doc_type)
    loader = DirectoryLoader(folder, glob="**/*.txt",loader_cls=TextLoader)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata['doc_type'] = doc_type
        documents.append(doc)

test_patient_3_m
test_patient_1_f
test_patient_2_f
test_patient_4_f


In [10]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
len(chunks)

11

In [14]:
embeddings = OpenAIEmbeddings()
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [15]:
vector_db_patient = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(vector_db_patient._collection.count())

11


In [21]:
llm = ChatOpenAI(model=MODEL, temperature=0.7)
memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True)
retriever = vector_db_patient.as_retriever(search_kwargs={"k": 10})
conversational_chain =ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever,memory=memory,callbacks=[StdOutCallbackHandler()])



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
PHYSICAL EXAMINATION:

Temperature 95.8, pulse 68, blood pressure 132/73, and weight 159 pounds.

This is a pleasant female in no acute distress.
The patient's abdomen is soft, nontender, nondistended with a well-healed midline scar.
There is an ileostomy in the right hemiabdomen, which is pink, patent, productive, and protuberant.
There are no signs of masses or hernias over the patient's abdomen.

ASSESSMENT AND PLAN:

HISTORY OF PRESENT ILLNESS:,

Ms. Mouse is a 67-year-old white female with a history of uterine papillary serous carcinoma who is
status post 6 cycles of carboplatin and Taxol, is here today f

In [20]:
def chat_message(message,history):
    query = message
    result = conversational_chain.invoke(query)
    return result['answer']

gr.ChatInterface(chat_message).launch()

Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


