In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import glob
from dotenv import load_dotenv
from langchain.document_loaders import DirectoryLoader, TextLoader
# imports for langchain

from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
# from langchain_chroma import Chroma
from langchain.vectorstores import FAISS
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
import requests

In [2]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge-base/*")

# With thanks to CG and Jon R, students on the course, for this fix needed for some users
text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

In [3]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)

In [4]:
len(chunks)

123

In [5]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: company, products, employees, contracts


In [6]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')

In [32]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load embedding model
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
)

# Create FAISS vector store
vectorstore = FAISS.from_documents(chunks, embeddings)

# Save and reload the vector store
vectorstore.save_local("faiss_index_")
persisted_vectorstore = FAISS.load_local("faiss_index_", embeddings, allow_dangerous_deserialization=True)

# Create a retriever
retriever = vectorstore.as_retriever(search_kwargs={"score_threshold": 0.7})

In [17]:
# from openai import OpenAI
#
# client = OpenAI(base_url="http://127.0.0.1:1234/v1", api_key="lm-studio")
# MODEL = "dolphin3.0-llama3.2-3b"
#
# response = client.chat.completions.create(
#     model=MODEL,
#     messages=[
#         {"role": "user", "content": "give me a joke"}
#     ]
# )
#
# print(response.choices[0].message.content)


Of course! Here's a joke for you:

Why did the computer run away from the wall?

Because it was afraid of the firewall!

I hope you enjoyed that one! If you need help with anything else, just let me know. I'm here to assist you.


In [38]:
from openai import OpenAI
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# LM Studio client
client = OpenAI(base_url="http://127.0.0.1:1234/v1", api_key="lm-studio")
MODEL = "dolphin3.0-llama3.2-3b"


In [67]:
def format_history(chat_history):
    """Format chat history as a list of OpenAI-style messages."""
    return [{"role": msg["role"], "content": msg["content"]} for msg in chat_history]

def build_messages(history, context, query):
    """Construct system + user messages for OpenAI"""
    system_prompt = """
You are a helpful assistant for the website Metropolis.
Only use the information provided in the CONTEXT below to answer the user's question.
If the answer is not in the context, respond with: "I don't know based on the documents."
Never make up answers.

CONTEXT:
""" + context
    messages = [
        {"role": "system", "content": system_prompt},
        *format_history(history),
        {"role": "user", "content": query}
    ]
    return messages


In [68]:
conversation_history = []

In [69]:
def get_rag_response(query: str) -> str:
    # 1. Get relevant docs
    docs = retriever.invoke(query)
    context = "\n\n".join(doc.page_content for doc in docs)

    # 2. Build message list
    messages = build_messages(conversation_history, context, query)
    # 3. Call LLM via OpenAI-compatible API
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        temperature=0.0
    )
    answer = response.choices[0].message.content.strip()

    # 4. Update global conversation history
    conversation_history.append({"role": "user", "content": query})
    conversation_history.append({"role": "assistant", "content": answer})

    return answer

In [73]:
conversation_history

[{'role': 'user', 'content': "I'm abdou belhadj"},
 {'role': 'assistant', 'content': '"I don\'t know based on the documents."'},
 {'role': 'user', 'content': 'Hello !'},
 {'role': 'assistant', 'content': '"Hello! How can I assist you today?"'}]

In [77]:
question = "what are the services that metropolis provide?"
response = get_rag_response(question)
print("🧠", response)

🧠 Based on the context provided and the information available about Metropolis, here are some of the services they may offer:

1. **Information and Assistance**: They provide assistance to users by answering questions and providing information based on the documents provided in the context.

2. **Helpful Assistant for the Website**: They act as a helpful assistant for the website Metropolis, aiming to assist users with their inquiries and needs.

3. **Document-Based Answers**: Their answers are always based on the information provided in the context, ensuring that they provide accurate and relevant responses.

4. **No Making Up Answers**: They strictly adhere to the information provided in the context without making up any answers or creating hypothetical scenarios.

5. **Helpful and Professional**: They aim to be helpful and professional in their interactions with users, providing assistance and guidance as needed.

If you have any specific questions or need further assistance, please

In [62]:
# # Initialize conversation history
# conversation_history = []
#
# # Ask a question
# query = "who are you"
#
# # Get relevant docs
# docs = retriever.invoke(query)
# context = "\n\n".join(doc.page_content for doc in docs)
#
# # Build messages
# messages = build_messages(conversation_history, context, query)
#
# # Send to LM Studio via OpenAI API
# response = client.chat.completions.create(
#     model=MODEL,
#     messages=messages,
#     temperature=0.0
# )
#
# answer = response.choices[0].message.content.strip()
#
# # Print result
# print("🧠", answer)
#
# # Update conversation history
# conversation_history.append({"role": "user", "content": query})
# conversation_history.append({"role": "assistant", "content": answer})
