In [1]:
# Using a simple vector store
import os
from dotenv import load_dotenv
from llama_index.core.chat_engine import SimpleChatEngine
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.llms.openai import OpenAI
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    load_index_from_storage,
    StorageContext,
)
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv('API_KEY')
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv('OPENAI_API_BASE')
os.environ["OPENAI_API_VERSION"] = "2023-07-01-preview"

In [None]:
llm = AzureOpenAI(
    engine="stb-gpt-35-turbo", model="gpt-35-turbo", temperature=0.0
)
response = llm.complete("The sky is a beautiful blue and it suddenly rains")
print(response)

In [5]:
embed_model = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    api_key=os.environ["OPENAI_API_KEY"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["OPENAI_API_VERSION"],
)

### Load local files only

In [None]:
# load documents
local_documents = SimpleDirectoryReader("data/docs").load_data()

In [8]:
len(local_documents)

53

### Load web pages only

In [9]:
from llama_index.core import download_loader
from llama_index.readers.web import SimpleWebPageReader

In [10]:
web_documents = SimpleWebPageReader(html_to_text=True).load_data(["https://www.stb.gov.sg/content/stb/en/careers/students-and-fresh-graduates.html"])

In [None]:
print(web_documents[0])

In [None]:
print(web_documents[0].text)

In [13]:
len(web_documents)

1

In [14]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [15]:
index = VectorStoreIndex.from_documents(web_documents, embed_model=embed_model)
query_engine = index.as_query_engine()
response = query_engine.query("What are the programmes for students or fresh graduates at stb?")
print(response.response)

The programmes for students or fresh graduates at STB include the Scholarship Programme (Full-Term and Mid-Term) and the Internship Programme. These programmes provide opportunities for young talents to enter the workforce and gain valuable experiences in the tourism industry.


### Combine documents

In [16]:
web_documents

[Document(id_='https://www.stb.gov.sg/content/stb/en/careers/students-and-fresh-graduates.html', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="  * [ ](javascript:void\\(0\\);)\n\n    * __\n    * __\n\n[ TOP  ](javascript:void\\(0\\);)\n\n![](/content/dam/stb/images/bg-image/bg5.jpg)\n\n  * [Homepage](/content/stb/en.html)\n  * I am a...\n    * [Attractions Operator](/content/stb/en/i-am-a/attractions-operator.html)\n    * [Business/Leisure Event Organiser](/content/stb/en/i-am-a/business-leisure-event-organiser.html)\n    * [Hotelier](/content/stb/en/i-am-a/hotelier.html)\n    * [Media Professional](/content/stb/en/i-am-a/media-professional.html)\n    * [Tourist Guide](/content/stb/en/i-am-a/tourist-guide.html)\n    * [Travel Agent](/content/stb/en/i-am-a/travel-agent.html)\n  * [About STB](/content/stb/en/about-stb.html)\n    * [Overview](/content/stb/en/about-stb/overview.html)\n    * [Board Of Directors](/content

In [None]:
local_documents

In [19]:
combined_documents = local_documents + web_documents

In [20]:
len(combined_documents)

54

In [None]:
# Web documents are appended to the end of the document object
combined_documents

In [26]:
# Test query over combined documents
combined_index = VectorStoreIndex.from_documents(combined_documents, embed_model=embed_model)
new_query_engine = combined_index.as_query_engine()

In [31]:
response_1 = new_query_engine.query("What are the programmes for students or fresh graduates at stb?")
print(response.response)

The programmes for students or fresh graduates at STB include the Scholarship Programme (Full-Term and Mid-Term) and the Internship Programme. These programmes provide opportunities for young talents to enter the workforce and gain valuable experiences in the tourism industry.


In [30]:
response_2 = new_query_engine.query("Where is hotel v lavender")
print(response_2.response)

The links for TA complaints are:
1. [https://orchid.stb.gov.sg/display/INDUSTRY/General+SOP+-+CC](https://orchid.stb.gov.sg/display/INDUSTRY/General+SOP+-+CC)
2. [https://orchid.stb.gov.sg/display/INDUSTRY/General+SOP+-+CC](https://orchid.stb.gov.sg/display/INDUSTRY/General+SOP+-+CC)
