In [2]:
# !unzip data.zip

In [1]:
import json

## RAG

In [2]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.runnables import RunnableLambda

In [3]:
embedding_fn = OpenAIEmbeddings()

In [4]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def rag_chain_from_docs(prompt, llm, output_parser):
    chain = (
        RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
        | prompt
        | llm
        | output_parser
    )
    return chain

### HW3 Chain

In [5]:
hw3_db = FAISS.load_local("data/vector_index/hw3_db", embedding_fn, allow_dangerous_deserialization=True)
hw3_retriever = hw3_db.as_retriever(search_kwargs={"k":2})

In [6]:
hw3_prompt = PromptTemplate.from_template(
        """You are an expert in answering questions related to Homework 3 of Database Systems. \
Respond to the following question using only the provided context:
If the context do not contain an answer but lead to a resource mention the resource as the final answer.
Do not use your internal memory to answer the question.

Context: {context}

Question: {question}

Answer:"""
)

output_parser = StrOutputParser()
llm = ChatOpenAI()

hw3_chain = RunnableParallel(
    context=(lambda x: x["question"]) | hw3_retriever,
    question=(lambda x: x["question"])
).assign(answer=rag_chain_from_docs(hw3_prompt, llm, output_parser))

### HW4 Chain

In [7]:
hw4_db = FAISS.load_local("data/vector_index/hw4_db", embedding_fn, allow_dangerous_deserialization=True)
hw4_retriever = hw4_db.as_retriever(search_kwargs={"k":2})

In [8]:
hw4_prompt = PromptTemplate.from_template(
        """You are an expert in answering questions related to Homework 4 of Database Systems. \
Respond to the following question using only the provided context:
If the context do not contain an answer but lead to a resource mention the resource as the final answer.
Do not use your internal memory to answer the question.

Context: {context}

Question: {question}

Answer:"""
)

output_parser = StrOutputParser()
llm = ChatOpenAI(api_key="")

hw4_chain = RunnableParallel(
    context=(lambda x: x["question"]) | hw4_retriever,
    question=(lambda x: x["question"])
).assign(answer=rag_chain_from_docs(hw4_prompt, llm, output_parser))

## Lecture 1 Chain

In [9]:
lec1_db = FAISS.load_local("data/video_index/Lec_1_db", embedding_fn, allow_dangerous_deserialization=True)
lec1_retriever = lec1_db.as_retriever(search_kwargs={"k":2})

In [10]:
lec1_prompt = PromptTemplate.from_template(
        """You are an expert in answering questions related to Lecture 1 of Database Systems. \
Respond to the following question using only the provided context:
If the context do not contain an answer but lead to a resource mention the resource as the final answer.
Do not use your internal memory to answer the question.

Context: {context}

Question: {question}

Answer:"""
)

output_parser = StrOutputParser()
llm = ChatOpenAI()

lec1_chain = RunnableParallel(
    context=(lambda x: x["question"]) | lec1_retriever,
    question=(lambda x: x["question"])
).assign(answer=rag_chain_from_docs(lec1_prompt, llm, output_parser))

## PDF Chain

In [11]:
pdf_db = FAISS.load_local("data/pdf_index/pdf_db", embedding_fn, allow_dangerous_deserialization=True)
pdf_retriever = pdf_db.as_retriever(search_kwargs={"k":2})

In [23]:
pdf_prompt = PromptTemplate.from_template(
        """You are an expert in answering questions related to general lecture information embedded in pdf of Database Systems. \
Respond to the following question using only the provided context:
If the context do not contain an answer but lead to a resource mention the resource as the final answer.
Do not use your internal memory to answer the question.

Context: {context}

Question: {question}

Answer:"""
)

output_parser = StrOutputParser()
llm = ChatOpenAI()

pdf_chain = RunnableParallel(
    context=(lambda x: x["question"]) | pdf_retriever,
    question=(lambda x: x["question"])
).assign(answer=rag_chain_from_docs(pdf_prompt, llm, output_parser))

## HW Integrity

In [39]:
hw_questions = [
    "Compare and contrast extendible hashing with linear hashing?"
]

### Router

In [43]:
def route(info):
    if "hw3" in info["question"].lower():
        return hw3_chain
    elif "hw4" in info["question"].lower():
        return hw4_chain
    elif "lecture 1" in info["question"].lower():
        return lec1_chain
    elif "syllabus" in info["question"].lower():
        return """Thank you for asking about the syllabus for the upcoming midterm exam. I will need to confirm the details with the real teaching assistant for the subject. I'll reach out to them and get back to you as soon as possible with accurate information."""
    elif any(homework_question.lower() in info["question"].lower() for homework_question in hw_questions):
        return """I'm here to help you learn and understand your homework better, not to provide direct answers as it's important for your learning. If there's a specific part of the homework you believe is incorrect or needs clarification, please let me know the details. I'm here to assist you with understanding the material!"""
    else:
        return pdf_chain

## QA

In [86]:
full_chain = {"question": lambda x: x["question"]} | RunnableLambda(route)
full_chain.invoke({"question": "Trouble with ArcGIS in hw3"})

{'context': [Document(page_content='Post Subject: ArcGIS X USC\nPost Content: For Hw3, if anyone having trouble with creating an ArcGIS trial account, Please refer to this link: https://usctrojan.maps.arcgis.com/ For further info refer : https://spatial.usc.edu/software/proprietary-software-students/\nFollow Up Discussion: '),
  Document(page_content="Post Subject: arcgis - Filetype\nPost Content: Hello, After setting up a public ArcGIS account, when I navigate to the map tab and try to add a layer from a file, I'm shown this screen: It seems like I cannot add a .shp file as a layer?\nFollow Up Discussion: No subject No content ")],
 'question': 'Trouble with ArcGIS in hw3',
 'answer': 'https://spatial.usc.edu/software/proprietary-software-students/'}

In [32]:
full_chain = {"question": lambda x: x["question"]} | RunnableLambda(route)
full_chain.invoke({"question": "How have the major data models evolved and classification of models by level of abstraction in lecture 1"})

{'context': [Document(page_content="Frame Text: m bytesuscedu/csst\n\nLearning Objectives\n\n* In this chapter, you will learn:\n* How the major data models evolved\n\n* About emerging alternative data models and the need\nthey fulfill\n\n* How data models can be classified by their level of\nabstraction\n\n\nTranscription: how did this models evolve that is actually a very very useful question one of the biggest data models is called the relational data model slowly the first half of the course is getting you towards understanding what relational model is but the internet came along and give you a non-relational model so relational model is slowly not the dominant tomorrow but before that before the internet things slowly and then how do you classify all these different models tomorrow morning some analysis customer service very very very very very complex place all kinds of things going on currently okay so data modeling says I'll throw away all the messages that is what the modeling

In [33]:
full_chain = {"question": lambda x: x["question"]} | RunnableLambda(route)
full_chain.invoke({"question": "What are three types of data anomalies?"})

{'context': [Document(page_content='The three types of data anomalies\n\nee Types of Data Anomaly T Update Anomalies i] T Insertion Anomalies | - Deletion Anomalies i', metadata={'image_path': './data/PDF/images/585_1_pages/img_24.jpg', 'pdf': './data/PDF/pdf/585_1.pdf', 'pdf_page_num': 24}),
  Document(page_content='Types of DBs [cont\'d]\n\na Types of Databases * Online analytical processing (OLAP) = Enable retrieving, processing, and modeling data from the data warehouse " Business intelligence: Captures and processes business data to generate information that support decision making', metadata={'image_path': './data/PDF/images/585_1_pages/img_14.jpg', 'pdf': './data/PDF/pdf/585_1.pdf', 'pdf_page_num': 14})],
 'question': 'What are three types of data anomalies?',
 'answer': 'Update Anomalies, Insertion Anomalies, Deletion Anomalies'}

In [34]:
full_chain = {"question": lambda x: x["question"]} | RunnableLambda(route)
full_chain.invoke({"question": "What is the syllabus for the upcoming midterm exam?"})

"Thank you for asking about the syllabus for the upcoming midterm exam. I will need to confirm the details with the real teaching assistant for the subject. I'll reach out to them and get back to you as soon as possible with accurate information."

In [44]:
full_chain = {"question": lambda x: x["question"]} | RunnableLambda(route)
full_chain.invoke({"question": "Compare and contrast extendible hashing with linear hashing?"})

"I'm here to help you learn and understand your homework better, not to provide direct answers as it's important for your learning. If there's a specific part of the homework you believe is incorrect or needs clarification, please let me know the details. I'm here to assist you with understanding the material!"

## Assistant

In [45]:
from assistant import chat

In [90]:
query = "Trouble with ArcGIS in hw3"
response = chat(query)
print(response)

Refer to this link for troubleshooting ArcGIS in Homework 3: https://usctrojan.maps.arcgis.com/


In [46]:
query = "Compare and contrast extendible hashing with linear hashing?"
response = chat(query)
print(response)

I'm here to help you learn and understand your homework better, not to provide direct answers as it's important for your learning. If there's a specific part of the homework you believe is incorrect or needs clarification, please let me know the details. I'm here to assist you with understanding the material!
