## Set up LLaMa

In [1]:
from langchain_community.llms import LlamaCpp
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [2]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="llama_models\llama-2-7b-chat.Q5_K_S.gguf",
    temperature=0,
    max_tokens=500,
    n_ctx=3900,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama_models\llama-2-7b-chat.Q5_K_S.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32   

In [3]:
from langchain_community.embeddings import HuggingFaceEmbeddings
hf_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

## Prepare PDF documents

### Make documents from PDFs

In [4]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

pdf_paths = os.scandir('resources/all_pdfs/')
pages = []
for path in pdf_paths:
    if path.is_file():
        loader = PyPDFLoader('resources/all_pdfs/' + path.name)
        new_pages = loader.load_and_split()
        pages.extend(new_pages)

In [5]:
print(pages[0])

page_content='Department of Computer Science\nCOS 132\nImperative Programming\nLecturers: Ms. Tayana Morkel, Dr. Patricia Lutu and Dr. Vreda Pieterse\nc⃝Copyright reserved\n1' metadata={'source': 'resources/all_pdfs/COS 132 study guide.pdf', 'page': 0}


Generate meta-information from first chunk of each document

In [6]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

metadata_generation_template = """\
<<SYS>>

What is the six-character module code for this module in the study guide below? Provide those six characters only - do not provide any other information. No yapping.

Format the response as the three letters of the code, and then the three numbers, for example: COS 999

<</SYS>>

[INST]
Extract the module code from the below text.

study guide snippet: {study_guide_snippet}

[/INST]
"""

metadata_generation_prompt_template = PromptTemplate.from_template(metadata_generation_template)

metadata_chain = (
    {"study_guide_snippet": RunnablePassthrough()}
    | metadata_generation_prompt_template
    | llm
    | StrOutputParser()
)

In [7]:
import pickle
saved_metadata = dict()
if os.path.exists('generated_metadata/saved_metadata.pkl'):
    with open('generated_metadata/saved_metadata.pkl', 'rb') as f:
        saved_metadata = pickle.load(f)
        print("Loaded saved data!")
        print(saved_metadata)

for page in pages:
    generated_metadata = ""
    page.page_content = str(page.page_content).replace('\n', '')
    page.page_content = str(page.page_content).replace('.', ' ')
    page.page_content = str(page.page_content).replace("'", ' ')  
    page.page_content = str(page.page_content).replace("`", ' ')  

    # for the "root" page of each document
    if page.metadata['page'] == 0:
        # set generated metadata to either the saved metadata...
        if saved_metadata.keys() != None and page.metadata['source'] in saved_metadata.keys():
            generated_metadata = saved_metadata[page.metadata['source']]
        # or generate new metadata if there is no saved metadata
        else:
            generated_metadata = metadata_chain.invoke(page.page_content)
            saved_metadata[page.metadata['source']] = generated_metadata
        
        # loop through all documents...
for page in pages:
    # update content to add metadata context.
    page.page_content = 'Module ' + saved_metadata[page.metadata['source']] + ':\n' + str(page.page_content) + '\n\n'


Loaded saved data!
{'resources/all_pdfs/COS 132 study guide.pdf': 'COS 132', 'resources/all_pdfs/COS110-studyGuide.pdf': 'COS 110', 'resources/all_pdfs/COS122_StudyGuide.pdf': 'COS 122', 'resources/all_pdfs/COS151_study_guide.pdf': 'COS 151', 'resources/all_pdfs/COS212_study_guide.pdf': 'COS 212', 'resources/all_pdfs/COS214StudyGuide2020_V1_0(1).pdf': 'COS 214', 'resources/all_pdfs/COS216_StudyGuide.pdf': 'COS 216', 'resources/all_pdfs/COS710StudyGuide-2023.pdf': 'COS 710', 'resources/all_pdfs/study guide(1).pdf': 'FNAS', 'resources/all_pdfs/Study guide(2).pdf': 'The six-character module code for WTW 148 is: COS 999', 'resources/all_pdfs/study guide.pdf': 'The six-character module code for the module "Statistics (STK 220)" in the study guide is:\nSTK 220', 'resources/all_pdfs/study_guide.pdf': 'COS 999'}


In [8]:
import pprint
# Prints the nicely formatted dictionary
pprint.pprint(saved_metadata)

{'resources/all_pdfs/COS 132 study guide.pdf': 'COS 132',
 'resources/all_pdfs/COS110-studyGuide.pdf': 'COS 110',
 'resources/all_pdfs/COS122_StudyGuide.pdf': 'COS 122',
 'resources/all_pdfs/COS151_study_guide.pdf': 'COS 151',
 'resources/all_pdfs/COS212_study_guide.pdf': 'COS 212',
 'resources/all_pdfs/COS214StudyGuide2020_V1_0(1).pdf': 'COS 214',
 'resources/all_pdfs/COS216_StudyGuide.pdf': 'COS 216',
 'resources/all_pdfs/COS710StudyGuide-2023.pdf': 'COS 710',
 'resources/all_pdfs/Study guide(2).pdf': 'The six-character module code for '
                                          'WTW 148 is: COS 999',
 'resources/all_pdfs/study guide(1).pdf': 'FNAS',
 'resources/all_pdfs/study guide.pdf': 'The six-character module code for the '
                                       'module "Statistics (STK 220)" in the '
                                       'study guide is:\n'
                                       'STK 220',
 'resources/all_pdfs/study_guide.pdf': 'COS 999'}


In [9]:
with open('generated_metadata/saved_metadata.pkl', 'wb') as f:
    saved_metadata = pickle.dump(saved_metadata, f)

In [10]:
pprint.pprint(pages[30].page_content)

('Module COS 122:\n'
 'The ﬁnal EO is a written exam during the normal examination period  The '
 'following regula-tions hold:•To pass the course, a student must obtain a '
 'ﬁnal mark of at least 50% •A student will pass the course with distinction '
 'if he/she pass the course with a ﬁnalmark of at least 75% Note that all '
 'these mark thresholds are applied strictly  The marks of students who '
 'comeclose to them, but do not actually meet them will notbe condoned (i e  '
 'put up)  Please alsotake note of the examination rules, as provided for in '
 'the general rules and regulations of theUniversity of Pretoria, under '
 'section G 12  The date and venue for the exam is scheduledon the ofﬁcial UP '
 'exam time table 6 2 Mark DistributionMarks for all events will be published '
 'on the CS Website  Students are responsible forverifying the correctness of '
 'these marks  Please ﬁle complaints about marks via email, within10 calendar '
 'days after publication, to the COS122 ticke

## Create vector database from split documents

In [11]:
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(pages, hf_embeddings)

### Create a retrieval object which will be used in the query chain to provide context

In [24]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 1})
answer_docs = retriever.invoke("Who is the COS 122 course coordinator?")
for doc in answer_docs:
    print(doc.metadata)

{'source': 'resources/all_pdfs/COS110-studyGuide.pdf', 'page': 4}


In [103]:
# Sanity check - should return COS122_StudyGuide.pdf
test_retrieval_input = """\
Here are the keywords extracted from the question:

* operating system
* practical
* work
* compiled
* COS
* 110
"""
results_with_scores = db.similarity_search_with_score(test_retrieval_input, k=3)
for doc, score in results_with_scores:
    print(f"Metadata: {doc.metadata}, Score: {score}")

Metadata: {'source': 'resources/all_pdfs/COS122_StudyGuide.pdf', 'page': 2}, Score: 0.7923681735992432
Metadata: {'source': 'resources/all_pdfs/COS110-studyGuide.pdf', 'page': 0}, Score: 0.8154385089874268
Metadata: {'source': 'resources/all_pdfs/COS151_study_guide.pdf', 'page': 0}, Score: 0.8852511644363403


## Make a template and chain with which to make queries

In [104]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.prompts import PromptTemplate

# Method for formatting documents to place into the context of a query
def format_docs(docs):
    return "\n\n".join(str(str(doc.metadata) + ":\n " + str(doc.page_content) + "\n next document:\n") for doc in docs)

study_guide_rag_template = """\
<<SYS>>

Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

context: {context}

<</SYS>>

[INST]
question: {question}
answer:
[/INST]
"""

custom_rag_prompt = PromptTemplate.from_template(study_guide_rag_template)

retrieval_query_template = """\
<<SYS>>

Extract keywords from the given question.

Respond with a comma-seperated list of keywords ONLY. If a module code is found, prioritise it.

<</SYS>>

[INST]
question: {question}
keywords:
[/INST]
"""

retrieval_query_prompt_template = PromptTemplate.from_template(retrieval_query_template)

rag_chain = (
    {"context": retrieval_query_prompt_template | llm | retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)


In [107]:
from langchain.callbacks.tracers import ConsoleCallbackHandler

# response = rag_chain.invoke({"question": "Which operating system should practical work be compiled under for COS 110?"}, config={'callbacks': [ConsoleCallbackHandler()]})
response = rag_chain.invoke({"question": "Which operating system should practical work be compiled under for COS 110?"})

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "question": "Which operating system should practical work be compiled under for COS 110?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question>] Entering Chain run with input:
[0m{
  "question": "Which operating system should practical work be compiled under for COS 110?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question> > 3:chain:RunnablePassthrough] Entering Chain run with input:
[0m{
  "question": "Which operating system should practical work be compiled under for COS 110?"
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question> > 3:chain:RunnablePassthrough] [1ms] Exiting Chain run with output:
[0m{
  "question": "Which operating system should practical work be compiled under for COS 110?"
}
[32;1m[1;3m[chain/start][0

Llama.generate: prefix-match hit


Here are the keywords extracted from the question:

* operating system
* COS 110
* compiled
* practical


llama_print_timings:        load time =    1321.28 ms
llama_print_timings:      sample time =      11.16 ms /    29 runs   (    0.38 ms per token,  2598.80 tokens per second)
llama_print_timings: prompt eval time =   11294.42 ms /    79 tokens (  142.97 ms per token,     6.99 tokens per second)
llama_print_timings:        eval time =    6922.88 ms /    28 runs   (  247.25 ms per token,     4.04 tokens per second)
llama_print_timings:       total time =   18378.52 ms /   107 tokens
Llama.generate: prefix-match hit


[36;1m[1;3m[llm/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question> > 4:chain:RunnableSequence > 6:llm:LlamaCpp] [18.38s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Here are the keywords extracted from the question:\n\n* operating system\n* COS 110\n* compiled\n* practical",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question> > 4:chain:RunnableSequence > 8:chain:format_docs] Entering Chain run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,question> > 4:chain:RunnableSequence > 8:chain:format_docs] [1ms] Exiting Chain run with output:
[0m{
  "output": "{'source': 'resources/all_pdfs/COS122_StudyGuide.pdf', 'page': 2}:\n Module COS 122:\n1 Overview1 1 DescriptionFundamen


llama_print_timings:        load time =    1321.28 ms
llama_print_timings:      sample time =      20.95 ms /    54 runs   (    0.39 ms per token,  2577.44 tokens per second)
llama_print_timings: prompt eval time =  138332.42 ms /   686 tokens (  201.65 ms per token,     4.96 tokens per second)
llama_print_timings:        eval time =   13710.36 ms /    53 runs   (  258.69 ms per token,     3.87 tokens per second)
llama_print_timings:       total time =  152472.83 ms /   739 tokens


[36;1m[1;3m[llm/end][0m [1m[1:chain:RunnableSequence > 10:llm:LlamaCpp] [152.48s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "I don't know the answer to your question as it is not provided in the given context. The context only mentions COS 122 and does not provide any information about COS 110 or practical work being compiled under any operating system.",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 11:parser:StrOutputParser] Entering Parser run with input:
[0m{
  "input": "I don't know the answer to your question as it is not provided in the given context. The context only mentions COS 122 and does not provide any information about COS 110 or practical work being compiled under any operating system."
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 11:parser:StrOutputParser] [0ms] Exiting P

In [108]:
response

"I don't know the answer to your question as it is not provided in the given context. The context only mentions COS 122 and does not provide any information about COS 110 or practical work being compiled under any operating system."

In [98]:
from langchain.globals import set_verbose

set_verbose(True)
response = rag_chain.invoke({"question": "Which module teaches data structures and algorithms?"})
set_verbose(False)

Llama.generate: prefix-match hit


Here are the keywords extracted from the question:

* Data structures
* Algorithms
* Module


llama_print_timings:        load time =    1321.28 ms
llama_print_timings:      sample time =       6.60 ms /    22 runs   (    0.30 ms per token,  3331.82 tokens per second)
llama_print_timings: prompt eval time =    6371.32 ms /    55 tokens (  115.84 ms per token,     8.63 tokens per second)
llama_print_timings:        eval time =    4350.54 ms /    21 runs   (  207.17 ms per token,     4.83 tokens per second)
llama_print_timings:       total time =   10818.14 ms /    76 tokens
Llama.generate: prefix-match hit


Answer: The module that teaches data structures and algorithms is COS 212.


llama_print_timings:        load time =    1321.28 ms
llama_print_timings:      sample time =       6.37 ms /    20 runs   (    0.32 ms per token,  3138.73 tokens per second)
llama_print_timings: prompt eval time =   48091.58 ms /   252 tokens (  190.84 ms per token,     5.24 tokens per second)
llama_print_timings:        eval time =    4629.74 ms /    19 runs   (  243.67 ms per token,     4.10 tokens per second)
llama_print_timings:       total time =   52867.17 ms /   271 tokens
