# Setup

In [1]:
import os
import pandas as pd
pd.set_option("display.max_colwidth", None)

os.environ["AZURE_OPENAI_ENDPOINT"] = "https://dalle3-swo.openai.azure.com/"
os.environ["AZURE_OPENAI_API_KEY"] = "e51119f8d8774069a6594d92ccf7a70d"

In [2]:
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_community.vectorstores import Chroma
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings, OpenAIEmbeddings
from langchain_core.output_parsers import BaseOutputParser
from typing import List, Optional
import re
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import chain
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = AzureChatOpenAI(
    openai_api_version="2024-02-15-preview",
    azure_deployment="gpt-35-turbo",
    temperature=0
) 
smart_llm = AzureChatOpenAI(
    openai_api_version="2024-02-15-preview",
    azure_deployment="gpt-4",
    temperature=0
) 
gpt_35_turbo_instruct = AzureChatOpenAI(
    openai_api_version="2024-02-15-preview",
    azure_deployment="gpt-35-turbo-instruct",
    temperature=0
) 
# Embedding
embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    openai_api_version="2024-02-15-preview",
)

# Current Problems

bus route:
- tự ý hiện time date
- tự hiện số xe bus
- file pdf khó hiểu (ko phải là miêu tả pick up point mà là miêu tả tuyển đường)
- calendar đang từ mùng 8 đến 12, nhưng hỏi lịch bus ngày mùng 9 thì ko trả lời đc
- cách hỏi: có route bus nào available ko? -> đang assume là bot hiểu thành sitting slot available
hỏi lịch bus vào thứ 2 thì bot chỉ trả lời 1 chuyến, ví dụ có chuyến lúc 7h, thực tế thì đang mong chờ nó list cả hành trình

# Test questions

In [3]:
question_lists = [
    "I'm at BUV. I want to go to 87 Lang Ha, tell me available bus",
    "Tell me what are available routes for Monday 09 Apr",
    "What is the last bus timing from 87 LANG HA to BUV CAMPUS? I have a study group that ends late.",
    "How do I get updated information about daily traffic situations for the CAU GIAY route?",
]

# Implementation

## Process input

In [4]:
from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader

file_path = [
    "../data/bus_route/Cau Giay Bus Route Schedule_08.04.2024 - 12.04.2024 1.pdf",
    "../data/bus_route/Tay Ho Bus Route Schedule_08.04.2024 - 12.04.2024 1.pdf"
             ]
endpoint = "https://di-buv.cognitiveservices.azure.com/"
key = "fe9f945227944764889fd95f3bb2abda"

documents = []
for path in file_path:
    loader = AzureAIDocumentIntelligenceLoader(
    api_endpoint=endpoint, api_key=key, file_path=path, api_model="prebuilt-layout"
    )
    documents = documents + loader.load()

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_text_splitters import MarkdownHeaderTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(documents)

# Save to disk
Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="./chroma_db/bus_route")


<langchain_community.vectorstores.chroma.Chroma at 0x1079bab50>

## Create chain

In [7]:
# load from disk
vectorstore = Chroma(persist_directory="./chroma_db/bus_route", embedding_function=embeddings)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [8]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

template = """
You are an AI assistant that helps students find information about bus schedule. 

Here are some notes you MUST pay attention to:
- Some slots are just available at several specific days such as Tue to Fri, Tue and Fri, ... So if students want to pick up on Monday, just suggest slots available in Mon.
- The first column of bus schedule table is the bus route. For example, buv campus --> cau giay schedule, the bus starts from buv campus, then go to 87 Lang Ha, then go to Nguyen Quoc Tri. So if students want to take bus from BUV to Lang Ha, use time slot at BUV, not Lang Ha.
- In the document, The date is abbreviated. For example, Mon is Monday, Tue is Tuesday, and so on.
- Some bus schedules are just effective at a specific time range

Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

from operator import itemgetter
rag_chain = (
    {"context": itemgetter("question") | retriever | format_docs, "question": itemgetter("question")}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)



# Testing

In [9]:
question_lists

["I'm at BUV. I want to go to 87 Lang Ha, tell me available bus",
 'Tell me what are available routes for Monday 09 Apr',
 'What is the last bus timing from 87 LANG HA to BUV CAMPUS? I have a study group that ends late.',
 'How do I get updated information about daily traffic situations for the CAU GIAY route?']

In [10]:
print(rag_chain.invoke({'question':"I'm at BUV. I want to go to 87 Lang Ha, tell me available bus"}))

The available bus from BUV to 87 Lang Ha is at the following time slots: Slot 1: 08:05, Slot 2: 10:35, Slot 3: 11:30, Slot 4: 12:45, Slot 5: 14:35. Thanks for asking!


In [10]:
bot_answers = rag_chain.batch(question_lists)

In [19]:
question_lists

["I'm at BUV. I want to go to 87 Lang Ha, tell me available bus",
 'Tell me what are available routes for Monday 09 Apr',
 'What is the last bus timing from 87 LANG HA to BUV CAMPUS? I have a study group that ends late.',
 'How do I get updated information about daily traffic situations for the CAU GIAY route?']

In [11]:
import pandas as pd
answer_df = pd.DataFrame({
    "Question": question_lists,
    "Bot Answer": bot_answers
})

In [None]:
print(answer_df.to_markdown())

|    | Question                                                                                        | Bot Answer                                                                                                                                    |
|---:|:------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
|  0 |  I'm at BUV. I want to go to 87 Lang Ha, tell me available bus                                  | From BUV Campus to 87 Lang Ha, you have the following options:                                                                                |
|    |                                                                                                 |                                                                                                                                               |
|    |                                                                                                 | - Slot 1: 11:30 from Mon to Fri                                                                                                               |
|    |                                                                                                 | - Slot 2: 12:30 from Mon to Fri                                                                                                               |
|    |                                                                                                 | - Slot 3: 13:30 on Wed and Fri                                                                                                                |
|    |                                                                                                 | - Slot 4: 15:15 from Mon to Fri                                                                                                               |
|    |                                                                                                 | - Slot 5: 17:35 from Mon to Fri                                                                                                               |
|    |                                                                                                 |                                                                                                                                               |
|    |                                                                                                 | Please note that these schedules are effective from 08. 04. 2024 to 12. 04. 2024. Thanks for asking!                                          |
|  1 | Tell me what are available routes for Monday 09 Apr                                             | On Monday, 09 Apr, the available routes are:                                                                                                  |
|    |                                                                                                 |                                                                                                                                               |
|    |                                                                                                 | 1. Cau Giay Route:                                                                                                                            |
|    |                                                                                                 |    - Pick-up from Nguyen Quoc Tri at 07:00 and 11:40                                                                                          |
|    |                                                                                                 |    - Pick-up from 87 Lang Ha at 07:15 and 11:55                                                                                               |
|    |                                                                                                 |    - Pick-up from BUV Campus at 08:05 and 12:45                                                                                               |
|    |                                                                                                 |                                                                                                                                               |
|    |                                                                                                 | 2. Tay Ho Route:                                                                                                                              |
|    |                                                                                                 |    - Pick-up from 107 Xuan Dieu at 07:40 and 11:40                                                                                            |
|    |                                                                                                 |    - Pick-up from Ho Tay Water Park at 07:50 and 11:50                                                                                        |
|    |                                                                                                 |    - Pick-up from Vinhomes Ocean Park at 08:30 and 12:30                                                                                      |
|    |                                                                                                 |    - Pick-up from BUV Ecopark at 08:40 and 12:40                                                                                              |
|    |                                                                                                 |                                                                                                                                               |
|    |                                                                                                 | Thanks for asking!                                                                                                                            |
|  2 | What is the last bus timing from 87 LANG HA to BUV CAMPUS? I have a study group that ends late. | The last bus from 87 Lang Ha to BUV Campus is at 13:45. Thanks for asking!                                                                    |
|  3 | How do I get updated information about daily traffic situations for the CAU GIAY route?         | You can get updated information about the daily traffic situation for the Cau Giay route by scanning the provided QR code. Thanks for asking! |
