In [None]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from llama_parse import LlamaParse
from decouple import Config, RepositoryEnv
from langchain_community.document_loaders import TextLoader
from pathlib import Path
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import MessagesPlaceholder

config = Config(RepositoryEnv(".env"))

### JSON Loader

In [35]:
#!pip install jq
from langchain_community.document_loaders import JSONLoader
from pprint import pprint

loader = JSONLoader(
    file_path='./course_info_SC.json',
    jq_schema=".CMPUT[]",
    text_content=False)

data = loader.load()
pprint(data)

[Document(page_content='3 units (fi 6)(EITHER, 3-0-3)An introduction to fundamental concepts in computation, including state, abstraction, generalization, and representation. Introduction to algorithms, logic, number systems, circuits, and other topics in elementary computing science. This course cannot be taken for credit if credit has been obtained in CMPUT 114, 174, 274, or SCI 100. See Note (1) above.', metadata={'source': '/Users/anshulverma/Documents/AI_ML_LLMs/Course Planner/course_info_SC.json', 'seq_num': 1}),
 Document(page_content='3 units (fi 6)(EITHER, 3-0-3)CMPUT 174 and 175 use a problem-driven approach to introduce the fundamental ideas of Computing Science. Emphasis is on the underlying process behind the solution, independent of programming language or style. Basic notions of state, control flow, data structures, recursion, modularization, and testing are introduced through solving simple problems in a variety of domains such as text analysis, map navigation, game sea

In [None]:
document_path = "/course_info_SC_CMPUT.txt"
loader = TextLoader(document_path)
loaded_documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=6144, chunk_overlap=128)
documents = text_splitter.split_documents(loaded_documents)
print(len(documents))

2


In [37]:
import chromadb
from chromadb import Embeddings
from langchain.embeddings import HuggingFaceEmbeddings

ef = HuggingFaceEmbeddings()

class DefChromaEF(Embeddings):
  def __init__(self,ef):
    self.ef = ef

  def embed_documents(self,doc):
    return self.ef.embed_documents(doc)

  def embed_query(self, query):
    return self.ef.embed_query([query])[0]
  
  def __call__(self, input):
    return self.embed_documents(input)

embedding_function=DefChromaEF(ef)



In [38]:
client = chromadb.Client()
client.delete_collection(name="CMPUT_Courses")
collection = client.get_or_create_collection(name="CMPUT_Courses", embedding_function=embedding_function)
docs_json = []
for doc in documents:
    doc_json = doc.page_content
    docs_json.append(doc_json)
id_list = list(map(lambda tup: f"id{tup[0]}", enumerate(docs_json)))
collection.add(
    documents = docs_json,
    ids = id_list)

chroma = Chroma(client=client, collection_name="CMPUT_Courses", embedding_function=ef)
retriever = chroma.as_retriever(k=5, search_type="mmr")

In [39]:

prompt_template = """You are a robust and well-trained faculty advisor for the faculty of {subject} at the {university}.
Given information about the various courses offered by the faculty of {subject} at the university,
answer relevant queries and advise the users.
YOUR responses must be RESTRICTED to the learnt course information document for the faculty of {subject} in the {university},
DO NOT make answers up.
Correctly identify if the course mentioned in the query exists in the document or not, DO NOT use any other source of information.

The structure of the course information document includes asterisks to indicate the start of information on courses in a mentioned discipline like Computing Science.
The discipline heading is then followed by relevant courses with their titles, number of units and description.

Human: {question}

ONLY provide relevant information about courses mentioned in the document, DO NOT make course titles up.
IDENTIFY courses from the user query and only provide assistance based on the course information document.
Respond with a NO if the course doesn't exist in the document.
"""

#DO NOT access any other sources of information online for other universities apart from the University of Alberta.
#The answers about course inquiries should STRICTLY be restricted to the courses offered by the {university} as mentioned in the document.

classify_template = """DO NOT answer the question asked, please act as a robust and well-trained course of interest identifier. 
Identify the ALPHANUMERIC code of the courses of interest based on the provided text consisting of relevant courses provided by the {department_name} department for the University of Alberta.
The text consists of the alphanumeric code for the courses followed by a short description.
Human: {text}
Faculty: {faculty_name}
Department: {department_name}
Assistant: ONLY return the alphanumeric code for the courses. DO NOT make the codes up"""

question_prompt = PromptTemplate(template=prompt_template,
                        input_variables=["question", "subject", "university"])

classify_prompt = PromptTemplate(template=classify_template,
                        input_variables=["text", "faculty_name", "department_name"])

llm = ChatGroq(temperature=0, model_name="llama3-70b-8192", groq_api_key=config('GROQ_API_KEY'))

from langchain.chains import LLMChain
qa1 = LLMChain(llm=llm, prompt=classify_prompt)

### Test the model

In [40]:
from langchain.chains import LLMChain

question = "I have taken CMPUT 229. What courses qould you recommend me to take next?"
# Query Chroma Db collection. (TRIED)
# Try JSON loader (TRIED)
# Try a different chain? (TRIED)
# Increase k for better Chroma retrival? (TRIED)
# Change swarch_type for Chroma
#qa = LLMChain(llm=llm, prompt=question_prompt)
qa2 = RetrievalQA.from_chain_type(llm, retriever=retriever, chain_type="stuff")
result2 = qa2({"query": question})
print(result2['result'])
"""
#result = qa.generate([{"question": question, "subject": "science", "university": "University of Alberta"}])
#print(result.generations[0][0].text)
result1 = qa1.generate([{"text": result2['result'], "faculty_name": "Faculty of Science", "department_name": "Computing Science"}])
courses_list = result1.generations[0][0].text.split("\n")
print(courses_list)

import json 

with open('course_info_SC.json', 'r') as f:
    courses_dict = json.load(f)

for course in courses_list:
    print(courses_dict["CMPUT"][course])
"""

Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2


Based on the course catalog, CMPUT 229 is Computer Organization and Architecture I. Considering you've taken this course, I would recommend the following courses as a next step:

1. **CMPUT 329: Computer Organization and Architecture II**: This course is a natural progression from CMPUT 229, as it builds upon the concepts learned in the first course.
2. **CMPUT 313: Computer Networks**: This course explores computer networks, which is a related area to computer organization and architecture.
3. **CMPUT 379: Operating System Concepts**: This course delves into operating systems, which is closely related to computer organization and architecture.
4. **CMPUT 382: Introduction to GPU Programming**: If you're interested in parallel computing and architecture, this course could be a good fit.

These courses are all related to computer systems and architecture, and will help you build upon the foundation established in CMPUT 229. However, it's essential to check the prerequisites and course a

'\n#result = qa.generate([{"question": question, "subject": "science", "university": "University of Alberta"}])\n#print(result.generations[0][0].text)\nresult1 = qa1.generate([{"text": result2[\'result\'], "faculty_name": "Faculty of Science", "department_name": "Computing Science"}])\ncourses_list = result1.generations[0][0].text.split("\n")\nprint(courses_list)\n\nimport json \n\nwith open(\'course_info_SC.json\', \'r\') as f:\n    courses_dict = json.load(f)\n\nfor course in courses_list:\n    print(courses_dict["CMPUT"][course])\n'

In [42]:
def get_llama_response(question: str):
    qa2 = RetrievalQA.from_chain_type(llm, retriever=retriever, chain_type="stuff")
    result2 = qa2({"query": question})
    return result2['result']
    result1 = qa1.generate([{"text": result2['result'], "faculty_name": "Faculty of Science", "department_name": "Computing Science"}])
    return result1.generations[0][0].text

### Interactive UI using Gradio

In [43]:
import gradio as gr

test = gr.Interface(fn=get_llama_response, inputs="text", outputs="text")
test.launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2
Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2
