In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import JSONLoader

DRIVE_FOLDER = "../data/oms-official"
loader = DirectoryLoader(DRIVE_FOLDER, glob='**/*.json', show_progress=True, loader_cls=JSONLoader, loader_kwargs = {'jq_schema':'.', 'text_content': False})
course_info = loader.load()

100%|██████████| 67/67 [00:00<00:00, 318.08it/s]


In [2]:
import os
from dotenv import load_dotenv

# Load the .env file
load_dotenv()
OPEN_API_KEY = os.getenv("OPEN_API_KEY")

from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

COURSE_INFO_CHROMA_PATH = "../data/chroma_data_course_info/"
course_info_vector_db = Chroma.from_documents(
    course_info, OpenAIEmbeddings(openai_api_key = OPEN_API_KEY), persist_directory=REVIEWS_CHROMA_PATH
)

## Finished code

In [1]:
import dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)
import os

dotenv.load_dotenv()
OPEN_API_KEY = os.getenv("OPEN_API_KEY")

review_system_template_str = """Your job is to use courses information
provided to answer questions about questions about course prerequisite, 
suggested backgrounds, technical requirements, Contents, Goals, etc. 
Use the following context to answer questions.
Be as detailed as possible, but don't make up any information
that's not from the context. If you don't know an answer, say
you don't know.

{context}
"""

course_info_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"], template=review_system_template_str
    )
)

course_info_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"], template="{question}"
    )
)

messages = [course_info_system_prompt, course_info_human_prompt]
course_info_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)
chat_model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0, api_key = OPEN_API_KEY)


from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableMap
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
COURSE_INFO_CHROMA_PATH = "../data/chroma_data_prereq_background_chain/"

# Initialize the vector database and retriever
course_info_vector_db = Chroma(
    persist_directory=COURSE_INFO_CHROMA_PATH,
    embedding_function=OpenAIEmbeddings(openai_api_key=OPEN_API_KEY)
)

course_info_retriever = course_info_vector_db.as_retriever(k=1)

# Define a function to process each document's content and metadata
def format_with_metadata(documents):
    return [
        f"Review: {doc.page_content}\nCourse: {doc.metadata['source'].split('/')[-1].rstrip('.json')}"
        for doc in documents["context"]
    ]

# Create a chain that incorporates the formatted documents with metadata
course_info_chain = (
    {"context": course_info_retriever, "question": RunnablePassthrough()}
    | RunnableMap({"context": format_with_metadata, "question": lambda x: x})  # Format context with metadata
    | course_info_prompt_template
    | chat_model
    | StrOutputParser()
)
question_to_ask = """What's the course prerequisite of cs-6515"""
course_info_chain.invoke(question_to_ask)

  course_info_vector_db = Chroma(


'The course prerequisite for cs-6515, which is "Intro to Graduate Algorithms," includes taking CS 8001 OLP, a one credit-hour seminar designed to fulfill prerequisites to succeed in CS 6515. More information about this prerequisite seminar is available on the CS 8001 Seminars page.'