In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# print(OPENAI_API_KEY)

In [3]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [10]:
%pip install -qU langchain-community pypdf

Note: you may need to restart the kernel to use updated packages.


In [None]:
from langchain_community.document_loaders import PyPDFLoader

In [6]:
file_path = "C:\\Users\\Aryan Parihar\\Desktop\\GEN-AI\\Projects\\Question creator\\data\\MIT-Review-Research-Report.pdf"
loader = PyPDFLoader(file_path)
data=loader.load()

In [None]:
# for storing all the data in a single variable
question_gen =""
for page in data:
    question_gen += page.page_content

# chunking operation

In [12]:
# using llm to perform chunking as we use token-based splitting
from langchain_text_splitters import TokenTextSplitter
splitter_ques_gen = TokenTextSplitter(
    model_name="gpt-3.5-turbo",
    chunk_size=1000,
   chunk_overlap=100,
)


In [62]:
chunks_ques_gen = splitter_ques_gen.split_text(question_gen)

In [14]:
len(chunks_ques_gen)

5

# implementing the openAI model   

In [38]:
from langchain_openai import ChatOpenAI
llm_ques_gen_pipeline = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0
)

# creating hte prompt template

In [49]:
prompt_template = """
Generate five comprehensive and thought-provoking questions based on the following context to assess understanding and critical thinking skills.
Context:
{text}
Create questions suitable for an interview setting.
Number the questions from 1 to 5.
Questions:
"""


In [50]:
# prompt template
from langchain_core.prompts import PromptTemplate

In [51]:
prompt_ques_gen = PromptTemplate(template=prompt_template, 
                                input_variables=["text"])

In [82]:
# adding a refined prompt template for question generation according t the additional text provided by the user
refined_prompt_template_ques_gen = """You are an expert question generator. Based on the following context, generate only 5 questions based on the input provided by the user into more deep knowledge and thought-provoking questions to assess understanding and critical thinking skills.
Context: {text}
{existing_answer}.
Questions:
"""

In [54]:
refined_prompt_ques_gen = PromptTemplate(template=refined_prompt_template_ques_gen,
                                        input_variables=["text", "existing_answer"])

In [67]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

In [85]:
all_refined_questions = []

for chunk in chunks_ques_gen:
    refined = (refined_prompt_ques_gen | llm_ques_gen_pipeline | parser).invoke({
        "text": chunk,
        "existing_answer": (prompt_ques_gen | llm_ques_gen_pipeline | parser).invoke({"text": chunk})
    })
    all_refined_questions.append(refined.split("\n")[0].lstrip("0123456789. "))

for i, q in enumerate(all_refined_questions, 1):
    print(f"{i}. {q}")
    print('-'*50)


1. How can health-care institutions ensure that AI technologies are implemented in a way that truly enhances the quality of patient care and does not replace human interaction?
--------------------------------------------------
2. How do health care institutions prioritize their AI budget allocation between different AI applications, such as patient flow optimization, medical imaging, and predictive analytics?
--------------------------------------------------
3. How do AI-enabled scheduling tools give more autonomy to patients while streamlining smarter scheduling and increasing resource capacity in healthcare settings?
--------------------------------------------------
4. How can AI technologies be further developed to enhance the efficiency and accuracy of medical image analysis for clinicians?
--------------------------------------------------
5. How can AI be integrated into healthcare ecosystems to ensure that improvements made locally can have a global impact on healthcare?
----

# storing to vector embedding for ans

In [71]:
from langchain_openai import OpenAIEmbeddings

In [72]:
embeddings = OpenAIEmbeddings()

In [77]:
# storing to vector embedding for ans
from langchain_community.vectorstores import FAISS
vectorstore_ques_gen = FAISS.from_texts(
    all_refined_questions,
    embedding=embeddings
)

In [None]:
# initializing the llm for answer generation
llm_ans_gen = ChatOpenAI(
    model="gpt-3.5-turbo",
   temperature=0.7,
)

In [86]:
all_refined_questions

['How can health-care institutions ensure that AI technologies are implemented in a way that truly enhances the quality of patient care and does not replace human interaction?',
 'How do health care institutions prioritize their AI budget allocation between different AI applications, such as patient flow optimization, medical imaging, and predictive analytics?',
 'How do AI-enabled scheduling tools give more autonomy to patients while streamlining smarter scheduling and increasing resource capacity in healthcare settings?',
 'How can AI technologies be further developed to enhance the efficiency and accuracy of medical image analysis for clinicians?',
 'How can AI be integrated into healthcare ecosystems to ensure that improvements made locally can have a global impact on healthcare?']

In [None]:
for question in all_refined_questions:
    results = vectorstore_ques_gen.similarity_search(question, k=1)
    retrieved_text = results[0].page_content if results else "No relevant content found"
    prompt = f"Based on the following text, answer the question in a concise and readable way:\n\nText: {retrieved_text}\n\nQuestion: {question}"
    refined_answer = llm_ques_gen_pipeline.invoke(prompt)
    print(f"Q: {question}")
    print(f"A: {refined_answer}")
    print("-" * 50)
    with open("generated_questions_and_answers.txt", "a", encoding="utf-8") as f: # storing in file
        f.write(f"Question: {question}\n")
        f.write(f"Answer: {refined_answer}\n")
        f.write("-" * 50 + "\n")

Q: How can health-care institutions ensure that AI technologies are implemented in a way that truly enhances the quality of patient care and does not replace human interaction?
A: content='Health-care institutions can ensure that AI technologies are implemented in a way that enhances patient care and maintains human interaction by carefully considering ethical considerations, leveraging AI to address disparities in access to healthcare, utilizing AI for preventive care and early detection of health issues, and ensuring that AI complements the skills and expertise of medical professionals rather than replacing them.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 67, 'prompt_tokens': 196, 'total_tokens': 263, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'mo