In [38]:
import pickle
import os
import re
import json
 
# To help construct our Chat Messages
from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
 
# We will be using ChatGPT model (gpt-3.5-turbo)
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
 
# To parse outputs and get structured data back
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
 
# Enter your API Key
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
# Check OpenAI Key is present
len(os.environ.get('OPENAI_API_KEY')) > 0

True

In [14]:
with open(f"data/processed/Gastrointestinal.pkl", "rb") as f:
        VectorStore = pickle.load(f)
VectorStore

<langchain.vectorstores.faiss.FAISS at 0x7f33fa624970>

In [56]:
llm = OpenAI()
chain = load_qa_chain(llm=llm, chain_type="stuff")
docs = VectorStore.similarity_search(query='gastrointestinal anatomy scans', k=1)

In [57]:
docs

[Document(page_content='metabolism and which lab values increase or decrease depending on the\ndisease process. Be comfortable with basic interpretation of abdominal\nx-rays, CT scans, and endoscopic images.\n351\nFAS1_2019_09-Gastrointestinal_351-394.indd 351 10/26/18 10:58 AM352\nseCtion iii Gastrointestinal ` gastrointestinal—embryology Gastrointestinal ` gastrointestinal—embryology\n` gastrointestinal—embryology\nNormal Foregut—esophagus to upper duodenum.\ngastrointestinal Midgut—lower duodenum to proximal 2/3 of transverse colon.\nembryology Hindgut—distal 1/3 of transverse colon to anal canal above pectinate line.\nMidgut development:\n\x83 6th week—physiologic midgut herniates through umbilical ring\n\x83 10th week—returns to abdominal cavity + rotates around superior mesenteric artery (SMA),\ntotal 270° counterclockwise\nVentral wall defects Developmental defects due to failure of rostral fold closure (eg, sternal defects [ectopia cordis]),', metadata={})]

In [58]:
for doc in docs:
    doc.page_content = doc.page_content.replace('\n', ' ')
docs

[Document(page_content='metabolism and which lab values increase or decrease depending on the disease process. Be comfortable with basic interpretation of abdominal x-rays, CT scans, and endoscopic images. 351 FAS1_2019_09-Gastrointestinal_351-394.indd 351 10/26/18 10:58 AM352 seCtion iii Gastrointestinal ` gastrointestinal—embryology Gastrointestinal ` gastrointestinal—embryology ` gastrointestinal—embryology Normal Foregut—esophagus to upper duodenum. gastrointestinal Midgut—lower duodenum to proximal 2/3 of transverse colon. embryology Hindgut—distal 1/3 of transverse colon to anal canal above pectinate line. Midgut development: \x83 6th week—physiologic midgut herniates through umbilical ring \x83 10th week—returns to abdominal cavity + rotates around superior mesenteric artery (SMA), total 270° counterclockwise Ventral wall defects Developmental defects due to failure of rostral fold closure (eg, sternal defects [ectopia cordis]),', metadata={})]

In [59]:
response_schemas = [
    ResponseSchema(name="question", description="A question generated from input text snippet."),
    ResponseSchema(name="options", description="5 possible choices of the multiple choice question."),
    ResponseSchema(name="answer", description="the correct answer out of the five choices."),
    ResponseSchema(name="explanation", description="Explanation for the answer")
]

In [60]:
# The parser that will look for the LLM output in my schema and return it back to me
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
 
# The format instructions that LangChain makes. Let's look at them
format_instructions = output_parser.get_format_instructions()
 
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "\`\`\`json" and "\`\`\`":

```json
{
	"question": string  // A question generated from input text snippet.
	"options": string  // 5 possible choices of the multiple choice question.
	"answer": string  // the correct answer out of the five choices.
	"explanation": string  // Explanation for the answer
}
```


In [63]:
format_instructions = """The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "-json":

-json
{
	"question": string  // A question generated from input text snippet.
	"options": list  // 5 possible choices of the multiple choice question.
	"answer": integer  // the correct answer index out of the five choices.
	"explanation": string  // Explanation for the answer
}
-json"""

In [66]:
# The prompt template that brings it all together
question = f'''You are a professor creating a multiple choice questionnaire based on the context provided.\n{format_instructions}'''
question

'You are a professor creating a multiple choice questionnaire based on the context provided.\nThe output should be a markdown code snippet formatted in the following schema, including the leading and trailing "-json":\n\n-json\n{\n\t"question": string  // A question generated from input text snippet.\n\t"options": list  // 5 possible choices of the multiple choice question.\n\t"answer": integer  // the correct answer index out of the five choices.\n\t"explanation": string  // Explanation for the answer\n}\n-json'

In [67]:
for doc in docs:
    with get_openai_callback() as cb:
            response = chain.run(input_documents=[doc], question=question)
            print(cb)
    print(response)

Tokens Used: 406
	Prompt Tokens: 401
	Completion Tokens: 5
Successful Requests: 1
Total Cost (USD): $0.00812
 I don't know.


In [45]:
response

" I don't know."