In [50]:
import os
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
load_dotenv()

True

In [51]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("OPEN AI KEY IS MISSING")

In [52]:
pdf_path = 'Generative-AI.pdf'
loader = PyPDFLoader(pdf_path)
docs = loader.load()

In [53]:
docs.__len__()

47

In [54]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
split_docs = splitter.split_documents(docs)

In [55]:
embeddings = OpenAIEmbeddings(model = 'text-embedding-3-small')

In [56]:
vectorstore = Chroma.from_documents(
    documents=split_docs,
    embedding=embeddings
)

In [57]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

In [58]:
llm = ChatOpenAI(model_name='gpt-4o-mini', temperature=0)

In [59]:
prompt_template = """
You are a knowledgeable assistant. Use the provided context to answer the user's question accurately.
If the answer is not found in the context, respond with "I don't know" and suggest what to check next.

Context:
{context}

Question:
{question}

Answer:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [60]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT},
)

In [65]:
query = "TAC-GAN"
result = rag_chain({"query": query})

answer = result["result"]

if "I don't know" not in answer:
    # Ask LLM to elaborate more on the result
    followup_query = f"Please elaborate more on: {answer}"
    followup_result = rag_chain({"query": followup_query})
    answer = followup_result["result"]

print("\n=== ANSWER ===")
print(answer)

print("\n=== SOURCE PREVIEWS ===")
for i, doc in enumerate(result["source_documents"], start=1):
    print(f"\n[{i}] {doc.page_content[:200]}...")


=== ANSWER ===
I don't know. I suggest checking the original paper titled “TAC-GAN – Text to Image Generation via Adversarial Learning” or related literature on GANs for more specific information about TAC-GAN and its challenges.

=== SOURCE PREVIEWS ===

[1] their 2017 paper titled “TAC-GAN – Text 
This bird is completely black.
This bird is bright blue.
Beak
Head
A man in an orange jacket,
black pants and a black cap
wearing sunglasses skiing
Belly
Right...

[2] their 2017 paper titled “TAC-GAN – Text 
This bird is completely black.
This bird is bright blue.
Beak
Head
A man in an orange jacket,
black pants and a black cap
wearing sunglasses skiing
Belly
Right...

[3] their 2017 paper titled “TAC-GAN – Text 
This bird is completely black.
This bird is bright blue.
Beak
Head
A man in an orange jacket,
black pants and a black cap
wearing sunglasses skiing
Belly
Right...

[4] Accuracy of the result is another challenge that 
crops up while implementing this technology. 
GAN's processes 