In [1]:
!pip install openpyxl




[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
from dotenv import load_dotenv
import os
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone

In [4]:
load_dotenv()

True

In [5]:
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

In [6]:
pinecone_index = pc.Index(name="hematology-index")
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
embed_model = OpenAIEmbedding(model="text-embedding-ada-002", api_key=os.environ.get("OPENAI_API_KEY"))
index = VectorStoreIndex.from_vector_store(vector_store=vector_store, embed_model=embed_model)

In [7]:
# configure retriever
retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k=10,
    )
llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=os.environ.get("OPENAI_API_KEY"))

response_synthesizer = get_response_synthesizer(llm=llm)

query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
        node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)

In [8]:
df=pd.read_excel("../../Hematology-pdf/Harrisons Hematology and Oncology 2ed_questionsPage743.xlsx")

In [9]:
df

Unnamed: 0,Questions,Answers
0,A 39-year-old woman is evaluated for anemia. H...,The answer is A. (Chap. 2) This patient with ...
1,A 62-year-old man is evaluated for anemia. He ...,The answer is C. (Chap. 2) The reticulocyte i...
2,You are asked to review the peripheral blood s...,The answer is C. (Chap. 2) This blood smear sh...
3,All of the following are common manifestations...,The answer is E. (Chap. 3) von Willebrand dis...
4,A 68-year-old man is admitted to the intensive...,The answer is B. (Chap. 3) The activated part...
...,...,...
134,A 45-year-old man is diagnosed with pheochromo...,The answer is E. (Chap. 51) Complete removal ...
135,An 81-year-old man is admitted to the hospital...,The answer is E. (Chap. 52) Hypercalcemia is ...
136,A 55-year-old man is found to have a serum cal...,The answer is E. (Chap. 52) A variety of hormo...
137,A 55-year-old woman presents with progressive ...,The answer is C. (Chap. 53) One of the better ...


In [10]:
def modelResponse(llm,query_engine,df):
        questions=df['Questions']
        llmResponse=[]
        query_engineResponse=[]
        for q in questions:
                llmResponse.append(llm.complete(q).text)
                query_engineResponse.append(query_engine.query(q).response)
        df['chatGpt3.5'] = llmResponse
        df['AnswersWithRag'] = query_engineResponse
        return df

In [11]:
new_df=modelResponse(llm,query_engine,df)

In [12]:
new_df

Unnamed: 0,Questions,Answers,chatGpt3.5,AnswersWithRag
0,A 39-year-old woman is evaluated for anemia. H...,The answer is A. (Chap. 2) This patient with ...,C. Hemoglobin electrophoresis\n\nThis patient ...,A. Ferritin
1,A 62-year-old man is evaluated for anemia. He ...,The answer is C. (Chap. 2) The reticulocyte i...,The reticulocyte production index (RPI) is cal...,The reticulocyte production index (RPI) can be...
2,You are asked to review the peripheral blood s...,The answer is C. (Chap. 2) This blood smear sh...,D. Splenomegaly\n\nSplenomegaly is a common ph...,D. Splenomegaly
3,All of the following are common manifestations...,The answer is E. (Chap. 3) von Willebrand dis...,A. Angiodysplasia of the small bowel,A. Angiodysplasia of the small bowel
4,A 68-year-old man is admitted to the intensive...,The answer is B. (Chap. 3) The activated part...,B. Acquired factor VIII inhibitor\n\nThe patie...,The most likely cause of the patient's coagulo...
...,...,...,...,...
134,A 45-year-old man is diagnosed with pheochromo...,The answer is E. (Chap. 51) Complete removal ...,E. Treatment with IV phentolamine is indicated...,E. Treatment with IV phentolamine is indicated...
135,An 81-year-old man is admitted to the hospital...,The answer is E. (Chap. 52) Hypercalcemia is ...,B. Pamidronate\n\nPamidronate is a bisphosphon...,The appropriate initial management steps for t...
136,A 55-year-old man is found to have a serum cal...,The answer is E. (Chap. 52) A variety of hormo...,E. Parathyroid hormone related protein (PTH-rp...,D. Parathyroid hormone (PTH)
137,A 55-year-old woman presents with progressive ...,The answer is C. (Chap. 53) One of the better ...,C. Breast cancer\n\nThis patient's presentatio...,C. Breast cancer


In [14]:
new_df.to_csv("Experiment3.csv")