In [63]:
!pip install -U langchain-community langchain-openai faiss-cpu pypdf python-docx docx2txt openai



In [68]:
import os
from getpass import getpass
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from openai import OpenAI

In [52]:
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
embedding = OpenAIEmbeddings()

Enter your OpenAI API key: ··········


In [79]:
# 📂 Paths
FOLDER_PATH = "/content/wedlii_docs"
INDEX_PATH = "/content/faiss_index"

# 🔑 Embeddings
embedding = OpenAIEmbeddings()

# ✅ Load existing FAISS index if available
if os.path.exists(f"{INDEX_PATH}/index.faiss"):
    print("✅ Loading existing FAISS index...")
    vectorstore = FAISS.load_local(INDEX_PATH, embedding, allow_dangerous_deserialization=True)

else:
    # 📂 If no docs folder, create it and stop
    if not os.path.exists(FOLDER_PATH):
        os.makedirs(FOLDER_PATH)
        print(f"📂 Created '{FOLDER_PATH}'. Please add Wedlii PDF files and rerun.")
        raise SystemExit

    all_documents = []

    # 📥 Load all PDFs inside folder
    for file in os.listdir(FOLDER_PATH):
        path = os.path.join(FOLDER_PATH, file)
        file_name, ext = os.path.splitext(file)

        if ext.lower() != ".pdf":
            print(f"⚠️ Skipping non-PDF file: {file}")
            continue

        try:
            loader = PyPDFLoader(path)
            documents = loader.load()

            # 🧹 Clean filename → make a nice title
            clean_title = file_name.replace("_", " ").replace("-", " ").title()

            # 🏷 Use filename as category
            category = clean_title

            # Attach useful metadata
            for i, doc in enumerate(documents, 1):
                doc.metadata.update({
                    "source_file": file,
                    "chunk_number": i,
                    "page_number": i,
                    "title": clean_title if clean_title else "Untitled Document",
                    "category": category
                })

            all_documents.extend(documents)
            print(f"✅ Loaded {len(documents)} pages from {file}")

        except Exception as e:
            print(f"❌ Error loading {file}: {e}")

    if not all_documents:
        print("⚠️ No valid PDF documents found. Exiting.")
        raise SystemExit

    # ✂️ Split into chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_documents(all_documents)

    # 🔍 Create FAISS index
    vectorstore = FAISS.from_documents(chunks, embedding)
    vectorstore.save_local(INDEX_PATH)
    print("✅ FAISS index created and saved with chunked Wedlii docs!")


✅ Loading existing FAISS index...


In [72]:
policy_prompt = """
You are a Wedlii Knowledge Assistant.
Only answer questions using the information provided in Wedlii documents.
If the answer is not found, say: "I cannot find that information in the Wedlii docs."
Be clear, concise, and professional.
"""


In [None]:
print("🤵 Wedlii Knowledge Chatbot ready! Type 'exit' to stop.\n")

while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        print("👋 Goodbye! Thanks for chatting with Wedlii.")
        break


    classification_prompt = f"""
    You are a smart classifier. Classify the following query into ONE of these Wedlii categories:

    1. About Wedlii
    2. AI Wedding Visions
    3. BLOGS
    4. Bridal Hairstyle Types
    5. Bridal Makeup Types
    6. Deamy Byron Bay Boho Wedding Styled Shoot
    7. Vendors
    8. Venues
    9. Wedding Cake Types
    10. Wedding Flower Types
    11. Wedlii Colour Palette

    Query: "{query}"

    Respond ONLY with the exact category name.
    """

    classify_response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=[{"role": "system", "content": classification_prompt}],
        max_tokens=10,
        temperature=0
    )

    category = classify_response.choices[0].message.content.strip()
    print(f"🔍 GPT says this query is related to: {category}")


    category = category.replace("_", " ").title()


    relevant_docs = vectorstore.similarity_search(
        query,
        k=1,
        filter={"category": category}
    )


    context_list = []
    for i, doc in enumerate(relevant_docs, 1):
        context_list.append(
            f"Chunk {i} Content:\n{doc.page_content}\nSource: {doc.metadata.get('source_file')}\n"
        )
    context = "\n".join(context_list)


    print("\n=== Retrieved Chunks ===")
    for doc in relevant_docs:
        print("Content:", doc.page_content[:30], "...")
        print("Metadata:", doc.metadata)
        print("-" * 30)

    policy_prompt = """
You are the Wedlii Knowledge Chatbot.
Use the provided context to answer the user's question.
Be clear, concise, and professional.
"""


    messages = [
        {"role": "system", "content": policy_prompt},
        {
            "role": "user",
            "content": f"Category: {category}\n\nContext:\n{context}\n\nQuestion: {query}"
        }
    ]

    # 🔹 Get final answer
    response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=messages,
        max_tokens=50,
        temperature=0.5
    )

    print("Bot:", response.choices[0].message.content)
    print("=" * 80)


🤵 Wedlii Knowledge Chatbot ready! Type 'exit' to stop.

You: what is Hand-Painted Cakes
🔍 GPT says this query is related to: Wedding Cake Types

=== Retrieved Chunks ===
Content: ●  Intricate  artistic  design ...
Metadata: {'producer': 'Skia/PDF m141 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Wedding Cake Types', 'source': '/content/wedlii_docs/Wedding Cake Types.pdf', 'total_pages': 2, 'page': 1, 'page_label': '2', 'source_file': 'Wedding Cake Types.pdf', 'chunk_number': 2, 'page_number': 2, 'category': 'Wedding Cake Types'}
------------------------------
Bot: Hand-Painted Cakes are wedding cakes featuring intricate artistic designs painted directly onto the cake's surface, often reflecting personal style and creativity.
