In [1]:
import os 
from dotenv import load_dotenv

In [4]:
load_dotenv()
OPENAI_API_KEY=os.getenv('OPENAI_API_KEY')


In [5]:
from langchain.document_loaders import PyPDFLoader 

In [11]:
%cd ..

c:\Users\satya\RAGAWS\interview question creator


In [13]:
filepath='data/SDG Resource Document_Targets Overview.pdf'
loader=PyPDFLoader(filepath)
data=loader.load()

In [14]:
len(data)

19

In [19]:
question_gen=""
for page in data:
    question_gen+=page.page_content

In [29]:
#Token text splitter needs 2 times of chunking 

In [17]:
from langchain.text_splitter import TokenTextSplitter
splitter_ques_gen=TokenTextSplitter(
    model_name='gpt-3.5-turbo',
    chunk_size=10000,
    chunk_overlap=200
)

In [22]:
chunk_ques=splitter_ques_gen.split_text(question_gen)

In [24]:
from langchain.docstore.document import Document
document_ques_gen=[Document(page_content=t) for t in chunk_ques]

In [25]:
document_ques_gen



In [27]:
splitter_ans_gen=TokenTextSplitter(
    model_name='gpt-3.5-turbo',
    chunk_size=1000,
    chunk_overlap=100
)
document_answer_gen=splitter_ans_gen.split_documents(
    document_ques_gen
)

In [28]:
document_answer_gen 

[Document(page_content='4th SDG Youth Summer Camp – SDG Resource Document  \nThe 2030 Agenda for Sustainable Development’s 17 \nSustainable Development Goals (SDGs)    \nGoal: This document enables 4th SDG Youth Summer Camp participants to i) get to \nknow the 17 SDGs, ii) explore what areas each goal covers under its targets, iii) identify \ntargets of most interest to participants, and iv) identify synergies between the SDGs \nand chosen target(s).  \n \n \nGoal 1. End poverty in all its forms everywhere   \nTarget 1.1  By 2030, eradicate extreme poverty  for all people everywhere, currently measured \nas people living on less than $1.25 a day  \nTarget 1.2  By 2030, reduce at least by half the proportion  of men, women and  children of all \nages living in poverty  in all its dimensions according to national definitions  \nTarget 1.3  Implement nationally appropriate social protection systems and measures for \nall, including floors, and by 2030 achieve substantial coverage of the p

In [57]:
from langchain.chat_models import ChatOpenAI
llm_ques_gen_pipeline=ChatOpenAI(
    model='gpt-3.5-turbo',
    temperature=0.3
)

In [58]:
from langchain.prompts import PromptTemplate


In [59]:
PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["text"],
    template="""
    You are an expert at creating questions based on coding materials and documentation.
    Your goal is to prepare a coder or programmer for their exam and coding tests.
    You do this by asking questions about the text below:

    ------------
    {text}
    ------------

    Create questions that will prepare the coders or programmers for their tests.
    Make sure not to lose any important information.

    QUESTIONS:
    """
)

In [60]:
#PROMPT_QUESTIONS=PromptTemplate(template=prompt_template, input_variables=['text'])

In [61]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template="""
    You are an expert at creating practice questions based on coding material and documentation.
    Your goal is to help a coder or programmer prepare for a coding test.
    We have received some practice questions to a certain extent: {existing_answer}.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    {text}
    ------------

    Given the new context, refine the original questions in English.
    If the context is not helpful, please provide the original questions.
    QUESTIONS:
    """
)

In [62]:
from langchain.chains.summarize import load_summarize_chain

In [64]:
ques_gen_chain=load_summarize_chain(
    llm=llm_ques_gen_pipeline,
    chain_type="refine",
    question_prompt=PROMPT_QUESTIONS,
    verbose=True,
    refine_prompt=REFINE_PROMPT_QUESTIONS
)

In [65]:
ques=ques_gen_chain.run(document_ques_gen)
print(ques)



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are an expert at creating questions based on coding materials and documentation.
    Your goal is to prepare a coder or programmer for their exam and coding tests.
    You do this by asking questions about the text below:

    ------------
    4th SDG Youth Summer Camp – SDG Resource Document  
The 2030 Agenda for Sustainable Development’s 17 
Sustainable Development Goals (SDGs)    
Goal: This document enables 4th SDG Youth Summer Camp participants to i) get to 
know the 17 SDGs, ii) explore what areas each goal covers under its targets, iii) identify 
targets of most interest to participants, and iv) identify synergies between the SDGs 
and chosen target(s).  
 
 
Goal 1. End poverty in all its forms everywhere   
Target 1.1  By 2030, eradicate extreme poverty  for all people everywhere, currently measured 
as people living on less than $1.2

In [68]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()
from langchain.vectorstores import FAISS
vectorstore=FAISS.from_documents(document_answer_gen, embeddings)

In [69]:
llm_answer_gen=ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")

In [72]:
ques_list=ques.split('\n')

In [73]:
from langchain.chains import RetrievalQA

In [74]:
answer_generation_chain=RetrievalQA.from_chain_type(
    llm=llm_answer_gen,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [75]:
for question in ques_list:
    print("Question: ", question)
    answer = answer_generation_chain.run(question)
    print("Answer: ", answer)
    print("--------------------------------------------------\\n\\n")
    # Save answer to file
    with open("answers.txt", "a") as f:
        f.write("Question: " + question + "\\n")
        f.write("Answer: " + answer + "\\n")
        f.write("--------------------------------------------------\\n\\n")

Question:  1. What is the main goal of the 4th SDG Youth Summer Camp Resource Document?
Answer:  The main goal of the 4th SDG Youth Summer Camp Resource Document is to enable participants to get to know the 17 Sustainable Development Goals (SDGs), explore the areas covered by each goal, identify targets of interest, and identify synergies between the SDGs and chosen targets.
--------------------------------------------------\n\n
Question:  2. What are the targets under Goal 1: End poverty in all its forms everywhere?
Answer:  The targets under Goal 1: End poverty in all its forms everywhere are:

1. Target 1.1: By 2030, eradicate extreme poverty for all people everywhere, currently measured as people living on less than $1.25 a day.
2. Target 1.2: By 2030, reduce at least by half the proportion of men, women, and children of all ages living in poverty in all its dimensions according to national definitions.
3. Target 1.3: Implement nationally appropriate social protection systems and m