In [None]:
# [CELL 1] - Imports and Setup
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import shutil
import time

# Set your OpenAI API key
os.environ['OPENAI_API_KEY'] = 'IM NOT GIVING MY API KEY :) TYPE YOURS'

In [None]:
# [CELL 2] - Create fresh data with structured metadata
DATA_PATH = r'YOURPATH'
os.makedirs(DATA_PATH, exist_ok=True)

sample_text = """Title: Alice in Wonderland
Author: Lewis Carroll
Year: 1865

Chapter 1: Down the Rabbit-Hole

Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "and what is the use of a book," thought Alice "without pictures or conversations?"

So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her."""

with open(os.path.join(DATA_PATH, "alice_in_wonderland.md"), "w", encoding="utf-8") as f:
    f.write(sample_text)

In [3]:
# [CELL 3] - Better document processing
def load_and_split_documents():
    # Load documents
    loader = DirectoryLoader(
        DATA_PATH,
        glob="*.md",
        loader_cls=TextLoader,
        loader_kwargs={'encoding': 'utf-8'}
    )
    documents = loader.load()
    
    # Split with specific separators
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", ". ", "?", "!"],
        chunk_size=200,  # Smaller chunks
        chunk_overlap=0,  # No overlap to avoid duplicates
        length_function=len,
    )
    
    chunks = text_splitter.split_documents(documents)
    print(f"Created {len(chunks)} chunks")
    return chunks

chunks = load_and_split_documents()

Created 4 chunks


In [4]:
# [CELL 4] - Create vector store
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=OpenAIEmbeddings(),
    persist_directory="chroma"
)

In [5]:
# [CELL 5] - Improved query function
def ask_question(question, k=3):
    print(f"\nQuestion: {question}")
    print("-" * 50)
    
    results = vector_store.similarity_search(question, k=k)
    seen_content = set()
    
    print("\nAnswer:")
    for doc in results:
        content = doc.page_content.strip()
        if content not in seen_content:
            seen_content.add(content)
            # Only show if content is relevant to question
            if any(keyword.lower() in content.lower() for keyword in question.split()):
                print(f"\nRelevant Passage:")
                print(content)
                print("-" * 50)

# Test with specific questions
questions = [
    "Who is the author of this book?",
    "When was this book written?",
    "What happens in Chapter 1?",
    "What does Alice think about books?",
    "Describe the White Rabbit"
]

for question in questions:
    ask_question(question)


Question: Who is the author of this book?
--------------------------------------------------

Answer:

Relevant Passage:
of a book," thought Alice "without pictures or conversations?"
--------------------------------------------------

Relevant Passage:
Title: Alice in Wonderland
Author: Lewis Carroll
Year: 1865

Chapter 1: Down the Rabbit-Hole

Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "and what is the use of a book," thought Alice "without pictures or conversations?"
--------------------------------------------------

Question: When was this book written?
--------------------------------------------------

Answer:

Relevant Passage:
of a book," thought Alice "without pictures or conversations?"
--------------------------------------------------

Relevant Passage:
Title: Alice in Wonderland
Author: Lewis Carro

In [6]:
# [CELL 1] - Check setup
import os

# Check OpenAI API key
api_key = os.getenv('OPENAI_API_KEY')
print("API Key exists:", bool(api_key))
print("API Key starts with:", api_key[:8] if api_key else "No key found")

# Check if vector store exists
print("\nVector store exists:", 'vector_store' in locals())

API Key exists: True
API Key starts with: sk-proj-

Vector store exists: True


In [7]:
# [CELL 2] - Simple test
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

try:
    # Create new chat model
    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo",
        temperature=0.7
    )
    print("Chat model created successfully")
    
    # Create memory
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )
    print("Memory created successfully")
    
    # Create chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(),
        memory=memory,
        verbose=True
    )
    print("Chain created successfully")
    
    # Test simple question
    result = qa_chain({"question": "What is this story about?"})
    print("\nTest question result:", result["answer"])
    
except Exception as e:
    print("Error occurred:", str(e))

Chat model created successfully
Memory created successfully
Chain created successfully


  memory = ConversationBufferMemory(
  result = qa_chain({"question": "What is this story about?"})




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
?"

?"

?"

Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, "and what is the use of a book
Human: What is this story about?[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

Test question result: This story is a classic tale called "Alice's Adventures in Wonderland" by Lewis Carroll. It follows a young girl named Alice who falls down a rabbit hole into a fantasy world filled with peculiar creatures and nonsensical events. Throughout her journey, she encounters talking animals, attends unusual te

In [10]:
# [CELL 1] - Simple prompting function
def ask_ai(question):
    result = qa_chain({"question": question})
    print(f"\nQ: {question}")
    print(f"\nA: {result['answer']}")
    print("-" * 50)

# Try some example questions
ask_ai("tellmetheauthorname")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is this story about?
Assistant: This story is a classic tale called "Alice's Adventures in Wonderland" by Lewis Carroll. It follows a young girl named Alice who falls down a rabbit hole into a fantasy world filled with peculiar creatures and nonsensical events. Throughout her journey, she encounters talking animals, attends unusual tea parties, and navigates the whimsical and sometimes chaotic Wonderland.
Human: What is this story about?
Assistant: "Alice's Adventures in Wonderland" is a classic tale written by Lewis Carroll in 1865. It follows the story of a young girl named Alice who falls through a rabbit hole into a fantasy world filled with peculiar creatures and nonsensical situations. Throughout her journey, Alice encounters tal