In [1]:
# imports libary

import os
import glob
from dotenv import load_dotenv
import gradio as gr
from openai import OpenAI

In [2]:
# declare AI model gpt-4o-mini is the most lowest cost model

MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [3]:
# Load environment variables in a file called .env

load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
openai = OpenAI()

# First test with RAG

In [None]:
# Test matching documents

context = {}

documents = glob.glob("knowledge_base/Deep_learning/*")

for document in documents:
    name = document.split(os.sep)[-1][:-3]
    doc = ""
    with open(document, "r", encoding="utf-8") as f:
        doc = f.read()
    context[name]=doc
    
    


In [21]:
context.keys()

dict_keys(['association_rule_learning', 'clustering', 'decision_tree', 'dimensionality_reduction', 'gradient_boosting', 'knn', 'linear_regression', 'logistic_regression', 'machine_learning', 'machine_learning_overview', 'naive_bayes', 'random_forest', 'regression_classification', 'reinforcement_learning ', 'supervised_learning', 'svm', 'unsupervised_learning'])

In [33]:
system_message = """
You are an expert in AI/ML career guidance. You must answer strictly based on the context provided from the database.
If the context does not contain the necessary information, or if the answer cannot be derived directly from the context,
you must respond: "I do not have information in the database to answer this question."

Rules:
- Do not invent, guess, or hallucinate.
- Do not rely on prior knowledge outside the provided context.
- Provide brief, accurate answers only.
- If the context is empty or irrelevant, say you don't have the data.
- All answers must be in Vietnamese.
"""


In [23]:
def get_relevant_context(message):
    relevant_context = []
    for context_title, context_details in context.items():
        if context_title.lower() in message.lower():
            relevant_context.append(context_details)
    return relevant_context          

In [26]:
get_relevant_context("what about decision_tree?")

['---\ntitle: "Decision Tree"\ndescription: "Thuật toán Decision Tree trong Machine Learning."\ntags: ["Machine Learning", "Decision Tree"]\n---\n\n# Decision Tree\n\n## 1. Khái niệm\n\nDecision Tree là thuật toán thuộc nhóm **Supervised Learning**, dùng cho cả **Classification** và **Regression**. Mô hình hoạt động bằng cách chia dữ liệu thành các nhánh dựa trên điều kiện, tạo thành cấu trúc giống cây gồm **node**, **branch**, và **leaf**.\n\n---\n\n## 2. Cấu trúc cây quyết định\n\n* **Root Node**: điểm bắt đầu, chứa toàn bộ dữ liệu.\n* **Internal Node**: nút kiểm tra điều kiện (feature).\n* **Leaf Node**: nút kết luận → class label hoặc giá trị dự đoán.\n* **Branch**: đường nối thể hiện lựa chọn theo điều kiện.\n\n---\n\n## 3. Cách hoạt động\n\n1. Chọn feature tốt nhất để chia dữ liệu.\n2. Tạo một node với điều kiện rẽ nhánh.\n3. Lặp lại cho từng nhánh đến khi đạt điều kiện dừng.\n4. Node cuối trở thành leaf node.\n\n---\n\n## 4. Tiêu chí chọn feature (Split Criteria)\n\n### Đối với 

In [None]:
def add_context(message):
    relevant_context = get_relevant_context(message)
    if not relevant_context:
        message += "\n\n[CONTEXT NOT FOUND IN DATABASE]"
    
    message += "\n\nThe following additional context might be relevant in answering this question:\n\n"
    for relevant in relevant_context:
            message += relevant + "\n\n"
    return message

In [29]:
print(add_context("what about decision tree?"))

what about decision tree?


In [30]:
def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history
    message = add_context(message)
    messages.append({"role": "user", "content": message})

    stream = openai.chat.completions.create(model=MODEL, messages=messages, stream=True)

    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        yield response

## Now we will bring this up in Gradio using the Chat interface -

A quick and easy way to prototype a chat with an LLM

In [None]:
view = gr.ChatInterface(chat).launch()


# Visualizing the Vector Store conntect with langchain

## Split the text into chunks and Vector Store

In [None]:
# imports for langchain and Chroma and plotly

from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [15]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("knowledge_base/*")

# With thanks to CG and Jon R, students on the course, for this fix needed for some users 
text_loader_kwargs = {'encoding': 'utf-8'}
# If that doesn't work, some Windows users might need to uncomment the next line instead
# text_loader_kwargs={'autodetect_encoding': True}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
print(f"Number of document chunks: {len(chunks)}")

Number of document chunks: 437


In [17]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: DSA, Python, OOP, Deep_Learning, Preprocessing, Machine_Learning


In [34]:
for chunk in chunks:
    if 'hình ảnh, âm thanh' in chunk.page_content:
        print(chunk)
        print("_________")

In [18]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
len(chunks)

437

In [19]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: DSA, Python, OOP, Deep_Learning, Preprocessing, Machine_Learning


### Embeddings, and "Auto-Encoding LLMs"
mapping each chunk of text into a Vector that represents the meaning of the text, known as an embedding.

OpenAI offers a model to do this, which will use by calling their API with some LangChain code.

In [None]:
# Put the chunks of data into a Vector Store that associates a Vector Embedding with each chunk
# Chroma is a popular open source Vector Database based on SQLLite

embeddings = OpenAIEmbeddings()

# If you would rather use the free Vector Embeddings from HuggingFace sentence-transformers
# Then replace embeddings = OpenAIEmbeddings()
# with:
# from langchain.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Delete if already exists
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

# Create vectorstore

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 437 documents


In [21]:
# Get one vector and find how many dimensions it has

collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

The vectors have 1,536 dimensions


## Visualizing the Vector Store
Only run on notebook

In [13]:
# Prework

result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
doc_types = [metadata['doc_type'] for metadata in result['metadatas']]
color_map = {
    'Preprocessing': 'blue',
    'Deep_Learning': 'green',
    'Machine_Learning': 'red',
    'DSA': 'orange',
    'Python': 'yellow',
    'OOP': 'black'
}

colors = [color_map.get(t, 'gray') for t in doc_types]


Failed to send telemetry event CollectionGetEvent: capture() takes 1 positional argument but 3 were given


In [None]:
# We humans find it easier to visalize things in 2D!
# Reduce the dimensionality of the vectors to 2D using t-SNE
# (t-distributed stochastic neighbor embedding)

tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vector Store Visualization',
    xaxis_title='x',
    yaxis_title='y',
    width=800,
    height=600
)

import plotly.io as pio
pio.renderers.default = "notebook_connected"

fig.show()

In [None]:
# Let's try 3D!

tsne = TSNE(n_components=3, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=5, color=colors, opacity=0.8),
    text=[f"Type: {t}<br>Text: {d[:100]}..." for t, d in zip(doc_types, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

## Use LangChain to bring it all together

In [22]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG
retriever = vectorstore.as_retriever()

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [13]:
query = "Can you describe cnn in a few sentences"
result = conversation_chain.invoke({"question":query})
print(result["answer"])

CNN, or Convolutional Neural Network, is a specialized type of neural network particularly effective for processing data with a grid-like topology, such as images. It utilizes convolutional layers to automatically learn spatial features from the input data, making it suitable for tasks like image classification, object detection, and video analysis. CNNs are designed to reduce the number of parameters compared to traditional fully connected networks (MLPs) by employing techniques like parameter sharing and local receptive fields.


In [23]:
# set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the GPT 4o-mini LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

### Now we will bring this up in Gradio using the Chat interface -

A quick and easy way to prototype a chat with an LLM

In [17]:
# Wrapping in a function - note that history isn't used, as the memory is in the conversation_chain

def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [None]:
# And in Gradio:

view = gr.ChatInterface(chat).launch(inbrowser=True)

In [24]:
# Let's investigate what gets sent behind the scenes

from langchain_core.callbacks import StdOutCallbackHandler

llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory, callbacks=[StdOutCallbackHandler()])

query = "Khi nào nên sử dụng deep learning?"
result = conversation_chain.invoke({"question": query})
answer = result["answer"]
print("\nAnswer:", answer)



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
---
title: Deep Learning là gì?
description: Ghi chú tổng quan về Deep Learning, khái niệm cốt lõi, cách hoạt động, cấu trúc mô hình, ví dụ code, lỗi thường gặp và ứng dụng thực tế.
tags: [Deep Learning, Neural Networks, Machine Learning, AI]
---

## 1. Khái niệm

**Deep Learning (DL)** là một nhánh của **Machine Learning**, sử dụng **mạng nơ-ron nhân tạo nhiều tầng (deep neural networks)** để học các biểu diễn phức tạp từ dữ liệu.

Điểm khác biệt cốt lõi:
- Machine Learning truyền thống: **feature engineering thủ công**
- Deep Learning: **tự động học feature từ dữ liệu thô**

Deep Learning đặc biệt hiệu quả v

In [25]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
retriever = vectorstore.as_retriever(search_kwargs={"k": 25})

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [None]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [29]:
view = gr.ChatInterface(chat).launch(inbrowser=True)


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


## Fix replay with pretrained knowledge

Base prompt of langchain

“Use the following context to answer the question.
If you don’t know, say you don’t know.”

but it don't have this
“Do not answer if the question is general knowledge.”

Có 1 số kiến thức không được thể hiện trên knowledge tự build nhưng có đề cập thì nó vẫn láy ra những kiến thức có sản được train trong model vì vậy cần sửa prompt để tránh tính trạng này

## Cách này phù hộp cho việc sử dung cho
- RAG nội bộ
- QA system cần độ tin cậy cao

*CÁCH NÀY SẼ KO ĐƯỢC SỬ DỤNG CHO DỰ ÁN

In [44]:
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import LLMChain


In [None]:
# Prompt template for question answering that strictly limits the LLM to only use provided context
QA_PROMPT = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a question-answering system that MUST rely ONLY on the provided data.

MANDATORY RULES:
- Use ONLY the information contained in the <context> section.
- DO NOT use any prior knowledge, pretrained knowledge, or external information.
- Do NOT infer, extrapolate, or add explanations.
- If the answer does NOT appear explicitly or cannot be clearly derived from <context>,
  respond with EXACTLY the following sentence and NOTHING ELSE:

"I do not have information in the database to answer this question."

<context>
{context}
</context>

Question: {question}
"""
)

In [None]:
# Create a question generator chain to rephrase the user's question based on chat history
qa_chain = load_qa_chain(
    llm=llm,
    chain_type="stuff",
    prompt=QA_PROMPT
)

In [None]:
# Prompt template to rephrase the user's question based on chat history
CONDENSE_QUESTION_PROMPT = PromptTemplate(
    input_variables=["chat_history", "question"],
    template="""
Given the following conversation history and a follow-up question,
rewrite the follow-up question into a standalone question.

Chat History:
{chat_history}

Follow-up Question:
{question}

Standalone Question:
"""
)

In [None]:
# Create the question generator LLM chain
question_generator = LLMChain(
    llm=llm,
    prompt=CONDENSE_QUESTION_PROMPT
)

  warn_deprecated(


In [47]:
# create a new Chat with OpenAI
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

# set up the conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# the retriever is an abstraction over the VectorStore that will be used during RAG; k is how many chunks to use
retriever = vectorstore.as_retriever(search_kwargs={"k": 25})

# putting it together: set up the conversation chain with the GPT 3.5 LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain(
    retriever=retriever,
    combine_docs_chain=qa_chain,
    question_generator=question_generator,
    memory=memory
)


In [48]:
# Wrapping in a function - note that history isn't used, as the memory is in the conversation_chain
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [49]:
view = gr.ChatInterface(chat).launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.
