In [26]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
   ---------------------------------------- 0.0/250.0 kB ? eta -:--:--
   ---- ----------------------------------- 30.7/250.0 kB ? eta -:--:--
   ------------------- -------------------- 122.9/250.0 kB 1.4 MB/s eta 0:00:01
   ---------------------------------------  245.8/250.0 kB 2.1 MB/s eta 0:00:01
   ---------------------------------------- 250.0/250.0 kB 1.9 MB/s eta 0:00:00
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2



[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [33]:
import pandas as pd

In [1]:
from dotenv import load_dotenv
import os
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone

In [3]:
load_dotenv()

True

In [4]:
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

In [5]:
pinecone_index = pc.Index(name="hematology-index")
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
embed_model = OpenAIEmbedding(model="text-embedding-ada-002", api_key=os.environ.get("OPENAI_API_KEY"))
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, embed_model=embed_model)

In [6]:
# configure retriever
retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=10,
    )
llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=os.environ.get("OPENAI_API_KEY"))

response_synthesizer = get_response_synthesizer(llm=llm)

query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
        node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)

In [34]:
df=pd.read_excel("./Hematology-pdf/American College of Physicians - MKSAP 19_ medical knowledge self-assessment program. Hematology-American College of Physicians (2021).xlsx")

In [35]:
df

Unnamed: 0,Questions,Answers
0,A S8-year-old man is evaluated for possible sm...,Answer: C Educational Objective: Evaluate smo...
1,A 47 year-old woman is hospitalized with a 3-d...,Answer: C Educational Objective: Treat pure r...
2,A S5-year-old woman is evaluated before starti...,Answer: E Educational Objective: Evaluate a pr...
3,A 42-year-old woman is evaluated for several m...,Answer: D Educational Objective: Prevent herpe...
4,A 27-year-old woman is evaluated for shortness...,Answer: D Educational Objective: Diagnose vita...
...,...,...
72,A 41-year-old man is evaluated in the emergenc...,Answer: E Educational Objective: Manage acute ...
73,A 27-year old woman is diagnosed with acute fe...,Answer: D Educational Objective: Treat a patie...
74,A 68 year old man is evaluated in the emergenc...,Answer: A Educational Objective: Manage gastro...
75,A 62 year old rvoman is l.rospitalized for an ...,Answer: A Educational Objective: Treat bleedin...


In [38]:
def modelResponse(llm,query_engine,df):
        questions=df['Questions']
        llmResponse=[]
        query_engineResponse=[]
        for q in questions:
                llmResponse.append(llm.complete(q).text)
                query_engineResponse.append(query_engine.query(q).response)
        df['chatGpt3.5'] = llmResponse
        df['AnswersWithRag'] = query_engineResponse
        return df

In [39]:
new_df=modelResponse(llm,query_engine,df)

In [40]:
new_df

Unnamed: 0,Questions,Answers,chatGpt3.5,AnswersWithRag
0,A S8-year-old man is evaluated for possible sm...,Answer: C Educational Objective: Evaluate smo...,(B) Skeletal survey\n\nA skeletal survey is th...,Skeletal survey
1,A 47 year-old woman is hospitalized with a 3-d...,Answer: C Educational Objective: Treat pure r...,(A) CT pulmonary angiography\n\nGiven the pati...,CT pulmonary angiography
2,A S5-year-old woman is evaluated before starti...,Answer: E Educational Objective: Evaluate a pr...,(A) Epoetin\n\nThis patient has anemia with a ...,The most appropriate additional treatment duri...
3,A 42-year-old woman is evaluated for several m...,Answer: D Educational Objective: Prevent herpe...,(C) Intravenous immune globulin\n\nThis patien...,Intravenous immune globulin would be the most ...
4,A 27-year-old woman is evaluated for shortness...,Answer: D Educational Objective: Diagnose vita...,(D) Serum vitamin B12 level\n\nThe patient's p...,The most appropriate initial diagnostic test w...
...,...,...,...,...
72,A 41-year-old man is evaluated in the emergenc...,Answer: E Educational Objective: Manage acute ...,(B) Plasma exchange\n\nThis patient is present...,(B) Plasma exchange
73,A 27-year old woman is diagnosed with acute fe...,Answer: D Educational Objective: Treat a patie...,(D) Low molecular weight heparin\n\nImmediate ...,The most appropriate immediate treatment would...
74,A 68 year old man is evaluated in the emergenc...,Answer: A Educational Objective: Manage gastro...,(B) Discontinue clopidogrel and aspirin\n\nIn ...,(A) Discontinue clopidogrel
75,A 62 year old rvoman is l.rospitalized for an ...,Answer: A Educational Objective: Treat bleedin...,(C) Fresh frozen plasma\n\nFresh frozen plasma...,The most appropriate treatment for the 62-year...


In [43]:
new_df.to_csv("Experiment.csv")