### Importing Requirements

In [None]:
import os

In [2]:
os.environ['HF_HUB_OFFLINE']="1" #UNCOMMENT FROM SECOND RUN (ensure the models are already cached, then run the same program)

In [None]:
from langchain_community.document_loaders import WebBaseLoader
import bs4
from langchain_text_splitters.character import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
# from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace --> for running text generation model locally, very slow
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
import dotenv

In [39]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

In [42]:
from langchain_core.output_parsers import StrOutputParser

### Web Scraping Info

```html
<section id="SECTION_NAME"></section>
```

```js
let links = []
[...document.getElementById('SECTION_NAME').getElementsByTagName('a')].forEach(link => links.push(link.href+''))
```

```html
<main id='content'></main>
```

### Testing

#### Indexing

##### Document Ingestion

In [3]:
loader = WebBaseLoader(
    web_paths= ['https://community.canvaslms.com/t5/Student-Guide/Student-Getting-Started-Resources/ta-p/579371'] ,
    bs_kwargs={
        "parse_only": bs4.SoupStrainer(id="content"),
    },
    bs_get_text_kwargs={"separator": " ", "strip": True},
)

In [4]:
docs = loader.load()

In [5]:
docs

[Document(metadata={'source': 'https://community.canvaslms.com/t5/Student-Guide/Student-Getting-Started-Resources/ta-p/579371'}, page_content="Student Getting Started Resources Below are some general questions and answers about Canvas, the Student role, and links to resources to help you be comfortable with Canvas. Visit the Student Guides to learn more. Introduction What is Canvas? Canvas is a Learning Management System. To learn more about Canvas terminology and definitions, visit How does Canvas define the terms used to describe its features and functions? Because Canvas is a web-based system, it doesn’t need to be installed on your computer. However, you’ll want to make sure that your computer and web browser meet the basic requirements to run Canvas . Canvas also has a free mobile app available for iOS and Android that is specifically tailored to students. You can learn more about how the mobile app features compare to Canvas on the web. What is the Student role? In Canvas, the St

In [6]:
len(docs[0].page_content)

4903

##### Text Splitting

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)

In [11]:
chunks = text_splitter.split_documents(docs)

In [12]:
chunks

[Document(metadata={'source': 'https://community.canvaslms.com/t5/Student-Guide/Student-Getting-Started-Resources/ta-p/579371'}, page_content='Student Getting Started Resources Below are some general questions and answers about Canvas, the Student role, and links to resources to help you be comfortable with Canvas. Visit the Student Guides to learn more. Introduction What is Canvas? Canvas is a Learning Management System. To learn more about Canvas terminology and definitions, visit How does Canvas define the terms used to describe its features and functions? Because Canvas is a web-based system, it doesn’t need to be installed on your computer. However, you’ll want to make sure that your computer and web browser meet the basic requirements to run Canvas . Canvas also has a free mobile app available for iOS and Android that is specifically tailored to students. You can learn more about how the mobile app features compare to Canvas on the web. What is the Student role? In Canvas, the St

In [13]:
len(chunks)

6

##### Embedding Generation and Storing in Vector Store

In [14]:
embedding_func = HuggingFaceEmbeddings(
    model_name='sentence-transformers/all-mpnet-base-v2',
    encode_kwargs={
        "normalize_embeddings": True, # cosine-friendly
        "batch_size": 16 # how many texts are embedded at once
    }
)   

In [15]:
vector_store = Chroma(
    embedding_function=embedding_func,
    collection_metadata={"hnsw:space": "cosine"}
)

In [16]:
vector_store.add_documents(chunks)

['bd0f9ab4-9aff-470d-84f6-3ee5dee7dbc2',
 '62925a66-f495-439d-85c5-8013699ed4a0',
 'dbfa1e85-1bf9-4613-9143-1eb74b31ac3a',
 '141d1429-710d-4d43-be61-62e622881831',
 '75cc8995-20f9-426a-84c9-63596a382d73',
 '82875a92-5792-43de-bdcd-ce8cdb124348']

In [17]:
len(vector_store.get(
    ids = ['bd0f9ab4-9aff-470d-84f6-3ee5dee7dbc2'],
    include = ['embeddings','documents']
)['embeddings'][0]) # maps sentences & paragraphs to a 768 dimensional dense vector space

768

In [19]:
vector_store.similarity_search_with_score(query='What is canvas?', k=2) # lower score = more similar

[(Document(id='bd0f9ab4-9aff-470d-84f6-3ee5dee7dbc2', metadata={'source': 'https://community.canvaslms.com/t5/Student-Guide/Student-Getting-Started-Resources/ta-p/579371'}, page_content='Student Getting Started Resources Below are some general questions and answers about Canvas, the Student role, and links to resources to help you be comfortable with Canvas. Visit the Student Guides to learn more. Introduction What is Canvas? Canvas is a Learning Management System. To learn more about Canvas terminology and definitions, visit How does Canvas define the terms used to describe its features and functions? Because Canvas is a web-based system, it doesn’t need to be installed on your computer. However, you’ll want to make sure that your computer and web browser meet the basic requirements to run Canvas . Canvas also has a free mobile app available for iOS and Android that is specifically tailored to students. You can learn more about how the mobile app features compare to Canvas on the web.

#### Retrieval

In [20]:
retriever = vector_store.as_retriever(
    search_kwargs={'k':2},
    search_type='similarity'
)

In [21]:
for i, doc in enumerate(retriever.invoke(input="What is canvas?"), start=1):
    print(f'-------DOC: {i}---------')
    print(doc.page_content)
    print(doc.metadata)
    print()
    print()

-------DOC: 1---------
Student Getting Started Resources Below are some general questions and answers about Canvas, the Student role, and links to resources to help you be comfortable with Canvas. Visit the Student Guides to learn more. Introduction What is Canvas? Canvas is a Learning Management System. To learn more about Canvas terminology and definitions, visit How does Canvas define the terms used to describe its features and functions? Because Canvas is a web-based system, it doesn’t need to be installed on your computer. However, you’ll want to make sure that your computer and web browser meet the basic requirements to run Canvas . Canvas also has a free mobile app available for iOS and Android that is specifically tailored to students. You can learn more about how the mobile app features compare to Canvas on the web. What is the Student role? In Canvas, the Student role is most commonly used to enroll users who will participate in a course for course credit. Users enrolled as s

In [22]:
question = 'what is canvas?'
retrieved_docs = retriever.invoke(input=question)

In [23]:
unrelated_question = 'What is the captial of France?'
uq_retrieved_docs = retriever.invoke(input=unrelated_question)

#### Augmentation

In [24]:
dotenv.load_dotenv(dotenv.find_dotenv())

True

In [25]:
chat_model = ChatOpenAI(model='gpt-5-nano', temperature=0.3, max_tokens=1500) # higher max_tokens seems to be required to allow the model to think+respons else empty response is being returned likely due to all the tokens being used to think and none left for responding

In [26]:
print(chat_model.invoke(input='hi').content)

Hi! Nice to meet you. What would you like to do today? I can help with questions, explanations, writing, coding, brainstorming ideas, planning, summarizing, or just chat. Tell me your goal or a topic, and we’ll dive in.


In [27]:
prompt = PromptTemplate(
    template="""
You are a helpful assistant that specificially supports new students regarding CANVAS Learning Management System by Instructure.

Your answer should ONLY be based on the provided context.
Always keep your responses short and consice.

If the context is insufficient or the query is not related to CANVAS LMS, just repond with: 
'I seem to have trouble helping you with your query. Would you like me to connect you to IT Service Connect?'

Context:
{context}

Question:
{question}
"""
)

In [28]:
context_text = '\n\n'.join([doc.page_content for doc in retrieved_docs])

In [29]:
final_prompt = prompt.invoke({'question': question, 'context':context_text})

In [30]:
print(final_prompt.text)


You are a helpful assistant that specificially supports new students regarding CANVAS Learning Management System by Instructure.

Your answer should ONLY be based on the provided context.
Always keep your responses short and consice.

If the context is insufficient or the query is not related to CANVAS LMS, just repond with: 
'I seem to have trouble helping you with your query. Would you like me to connect you to IT Service Connect?'

Context:
Student Getting Started Resources Below are some general questions and answers about Canvas, the Student role, and links to resources to help you be comfortable with Canvas. Visit the Student Guides to learn more. Introduction What is Canvas? Canvas is a Learning Management System. To learn more about Canvas terminology and definitions, visit How does Canvas define the terms used to describe its features and functions? Because Canvas is a web-based system, it doesn’t need to be installed on your computer. However, you’ll want to make sure that y

In [31]:
uq_context_text = '\n\n'.join([doc.page_content for doc in uq_retrieved_docs])

In [32]:
uq_final_prompt = prompt.invoke({'question': unrelated_question, 'context':uq_context_text})

#### Generation

In [33]:
answer = chat_model.invoke(final_prompt)

In [34]:
answer

AIMessage(content='Canvas is a Learning Management System (LMS) used to access course content, participate in assignments and communications. It’s web-based (no installation needed) and has a free mobile app for iOS and Android.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 372, 'prompt_tokens': 470, 'total_tokens': 842, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 320, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CSkR8ntiq3B0X4kWegB3KY8s9hWtS', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--7ffa3cb4-e1d5-4c7d-bab7-f5f4eb22895a-0', usage_metadata={'input_tokens': 470, 'output_tokens': 372, 'total_tokens': 842, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_detai

In [35]:
print(answer.content)

Canvas is a Learning Management System (LMS) used to access course content, participate in assignments and communications. It’s web-based (no installation needed) and has a free mobile app for iOS and Android.


In [None]:
'''
Canvas is a Learning Management System (LMS) used to access course content, participate in assignments and communications. It’s web-based (no installation needed) and has a free mobile app for iOS and Android.
'''

In [36]:
uq_answer = chat_model.invoke(uq_final_prompt)

In [37]:
print(uq_answer.content)

I seem to have trouble helping you with your query. Would you like me to connect you to IT Service Connect?


#### Building a Chain

In [38]:
def formatted_docs(retrieved_docs):
    context_text = '\n\n'.join([doc.page_content for doc in retrieved_docs])
    return context_text

In [40]:
parallel_chain = {
    'context': retriever | RunnableLambda(formatted_docs),
    'question': RunnablePassthrough()
}

In [43]:
main_chain = parallel_chain | prompt | chat_model | StrOutputParser()

In [44]:
main_chain.invoke('Hi')

'Hi! I can help with Canvas login (including QR code login on mobile) and navigating Canvas as a student. What would you like to do—log in with a QR code, view your Dashboard, or access assignments/grades?'

In [45]:
main_chain.invoke('when did WW2 end?')

'I seem to have trouble helping you with your query. Would you like me to connect you to IT Service Connect?'

In [48]:
main_chain.invoke('I am unable to view my assignments')

'If you can’t view assignments, the course may not be published or started yet. Students can view content and assignments only after the course is published and has begun. Make sure you’re enrolled as a student in a published course that has started. For exact steps, see “How do I view Assignments as a student?” in the Canvas Basics Guide.'