In [1]:
# imports
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import LlamaCpp

# LLaMa setup

In [2]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [3]:
llm = LlamaCpp(
    model_path=r"C:\Users\david\Documents\Work\Training\LangChain testing\llama-2-7b-chat.Q5_K_S.gguf",
    temperature=0,
    max_tokens=500,
    n_ctx=2500,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from C:\Users\david\Documents\Work\Training\LangChain testing\llama-2-7b-chat.Q5_K_S.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:      

# Study guide information extractor

In [4]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from pydantic import BaseModel, Field

class ModuleInfo(BaseModel):
    module_code: str = Field(description="the alphanumeric, 6-character code for this module")
    brief_description: str = Field(description="a brief, one-sentence summary of this module")
    lecture_time: str = Field(description="days and times of this lecture, in hh:mm, day of week format")
    exam_date: str = Field(description="date of the exam in dd/mm format")

In [5]:
module_code_schema = ResponseSchema(name="module_code", description="What is the alphanumeric, 6-character code for this module? If unknown, answer AAA000.")

brief_description_schema = ResponseSchema(name="brief_description", description="Provide a brief, one-sentence summary of this module.")

lecture_time_schema = ResponseSchema(name="lecture_times",
                                     description="What days and times are the lectures given? Answer in hh:mm - hh:mm, wday of week format. If unknown, answer with the word unknown.")

exam_date_schema = ResponseSchema(name="exam_date", description="What date will the exam for this module occur on? Answer in dd/mm format. If unknown, answer with the word unknown.")

response_schemas = [module_code_schema, brief_description_schema, lecture_time_schema, exam_date_schema]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()

In [6]:
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"module_code": string  // What is the alphanumeric, 6-character code for this module? If unknown, answer AAA000.
	"brief_description": string  // Provide a brief, one-sentence summary of this module.
	"lecture_times": string  // What days and times are the lectures given? Answer in hh:mm - hh:mm, wday of week format. If unknown, answer with the word unknown.
	"exam_date": string  // What date will the exam for this module occur on? Answer in dd/mm format. If unknown, answer with the word unknown.
}
```


In [7]:
module_info_template = """\
<<SYS>>

Given the following study guide, extract the following information:

module_code: What is the alphanumeric, 6-character code for this module? If unknown, answer AAA000.

brief_description: Provide a brief, one-sentence summary of this module.

lecture_times: What days and times are the lectures given? Answer in "hh:mm - hh:mm, wday of week" format. If unknown, answer with the word "unknown".

exam_date: What date will the exam for this module occur on? Answer in dd/mm format. If unknown, answer with the word "unknown". Never provide a trailing comma after this field.

{format_instructions}

Do not include a trailing comma after the exam_date field in the output. The JSON format never allows for a comma before a closing brace.

The study guide is provided below.

<</SYS>>

[INST]
study guide: {study_guide}
[/INST]
"""

module_info_prompt_template = ChatPromptTemplate.from_template(template=module_info_template, format_instructions=format_instructions)

### PDF reader setup

In [8]:
from PyPDF2 import PdfReader
import os

Create list of study guide PDF paths

In [9]:
file_path_object = os.scandir(path="resources/all_pdfs/")
file_path_list = []
for entry in file_path_object:
    if entry.is_file():
        file_path_list.append(entry.path)

Create dictionary of study guide texts - this should be redone to use proper LangChain conventions for document loading as an additional step

In [10]:
study_guide_text_dict: dict = dict()

for file_path in file_path_list:
    combined_text: str = ""
    reader: PdfReader= PdfReader(file_path)
    for page in reader.pages:
        extracted_text = page.extract_text()
        combined_text += "\n\n" + extracted_text
    study_guide_text_dict[file_path] = combined_text

Create series of messages for the LLM

In [11]:
test_guide = """\
This is the guide for module COS101.

Lectures happen on Wednesdays at 13:00-14:00.

The exam is on the 28th of June.

This module is about basic programming concepts.
"""

In [12]:
# initially, just going to work with one study guide - will scale to many once the firs tone is proven to work
study_guide_messages = module_info_prompt_template.format_messages(study_guide=study_guide_text_dict["resources/study_guide_pdfs/COS710StudyGuide-2023.pdf"], format_instructions=format_instructions)
# study_guide_messages = module_info_prompt_template.format_messages(study_guide=test_guide, format_instructions=format_instructions)

In [13]:
# response = llm.invoke(study_guide_messages)

In [14]:
# response

Invalid JSON object - create a chain which makes another request to try fix it

In [15]:
from langchain.chains import SequentialChain

In [16]:
json_fix_template = """\
<<SYS>>

Given the following input, check whether it is given in valid JSON which has been printed in markdown. Return only the fixed JSON object in markdown.

{format_instructions}

<</SYS>>

[INST]
input: {unchecked_json}
[/INST]
"""

json_fix_prompt_template = ChatPromptTemplate.from_template(template=json_fix_template)

In [17]:
initial_info_link = LLMChain(llm=llm, prompt=module_info_prompt_template, output_key="unchecked_json")
json_fix_link = LLMChain(llm=llm, prompt=json_fix_prompt_template)

sequential_chain = SequentialChain(
    input_variables=["format_instructions", "study_guide"],
    chains=[initial_info_link], 
    verbose=True)

In [18]:
inputs = dict()
inputs["study_guide"] = study_guide_text_dict["resources/study_guide_pdfs/COS710StudyGuide-2023.pdf"]
inputs["format_instructions"] = format_instructions
# response = sequential_chain.invoke(input=inputs)

In [19]:
# response: str = response['text']

In [20]:
# fixed_json_string = response.replace(",\n}\n", "\n}\n")

In [21]:
# test_dict = output_parser.parse(fixed_json_string)
# test_dict

# VectorStore for information gathering
## Steps to do:
- Add PDFs to VectorStore
- Search through VectorStores and answer information about all study guides

In [22]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import LlamaCppEmbeddings

llama_embeddings = LlamaCppEmbeddings(model_path="llama-2-7b.Q5_K_M.gguf")

docs = []
for file_path in file_path_list:
    loader = PyPDFLoader(file_path=file_path)
    pages = loader.load_and_split()
    docs.extend(pages)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
ll

In [24]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

In [27]:
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=llama_embeddings)


llama_print_timings:        load time =    1427.40 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =    1427.14 ms /     8 tokens (  178.39 ms per token,     5.61 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =    1428.89 ms /     9 tokens

llama_print_timings:        load time =    1427.40 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =    1530.92 ms /     8 tokens (  191.36 ms per token,     5.23 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =    1532.19 ms /     9 tokens

llama_print_timings:     

In [41]:
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5})

In [44]:
retrieved_docs = retriever.invoke("When is the exam for COS132?")
for document in retrieved_docs:
    print(document.metadata)


llama_print_timings:        load time =    1427.40 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =     912.96 ms /     8 tokens (  114.12 ms per token,     8.76 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =     913.03 ms /     9 tokens


{'page': 0, 'source': 'resources/study_guide_pdfs/COS 132 study guide.pdf', 'start_index': 0}
{'page': 5, 'source': 'resources/study_guide_pdfs/COS 132 study guide.pdf', 'start_index': 1701}
{'page': 9, 'source': 'resources/study_guide_pdfs/COS216_StudyGuide.pdf', 'start_index': 795}
{'page': 1, 'source': 'resources/study_guide_pdfs/COS 132 study guide.pdf', 'start_index': 1601}
{'page': 5, 'source': 'resources/study_guide_pdfs/COS216_StudyGuide.pdf', 'start_index': 838}


In [34]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [40]:
study_guide_rag_template = """\
<<SYS>>

Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.

context: {context}

<</SYS>>

[INST]
{question}
[/INST]
"""

custom_rag_prompt = PromptTemplate.from_template(study_guide_rag_template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [45]:
exam_132_response = rag_chain.invoke("When is the exam for COS132?")


llama_print_timings:        load time =    1427.40 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =     936.80 ms /     8 tokens (  117.10 ms per token,     8.54 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =     937.46 ms /     9 tokens
Llama.generate: prefix-match hit


Based on the provided context, the exam for COS132 is scheduled to take place during the June examination period. Specifically, Examination Opportunity 3 (EO3) is scheduled to take place in June.


llama_print_timings:        load time =    1313.21 ms
llama_print_timings:      sample time =      20.29 ms /    51 runs   (    0.40 ms per token,  2513.55 tokens per second)
llama_print_timings: prompt eval time =  150813.32 ms /   526 tokens (  286.72 ms per token,     3.49 tokens per second)
llama_print_timings:        eval time =   18315.74 ms /    50 runs   (  366.31 ms per token,     2.73 tokens per second)
llama_print_timings:       total time =  169631.31 ms /   576 tokens


In [46]:
which_lecturer_response = rag_chain.invoke("What subject matter does COS216 cover?")


llama_print_timings:        load time =    1427.40 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =     953.79 ms /     8 tokens (  119.22 ms per token,     8.39 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =     953.61 ms /     9 tokens
Llama.generate: prefix-match hit


COS216 covers the following subject matter:

* Goal setting and motivation
* Adjustment to university life
* Test or exam preparation
* Stress management
* Career exploration
* Time management and study methods
* FLY@UP: The Finish Line is Yours (think carefully before dropping modules after the closing date for amendments or cancellation of modules)
* Make responsible choices with your time and work consistently
* Aim for a good semester mark. Don't rely on the examination to pass.


llama_print_timings:        load time =    1313.21 ms
llama_print_timings:      sample time =      54.51 ms /   123 runs   (    0.44 ms per token,  2256.38 tokens per second)
llama_print_timings: prompt eval time =  189426.92 ms /   828 tokens (  228.78 ms per token,     4.37 tokens per second)
llama_print_timings:        eval time =   45678.97 ms /   122 runs   (  374.42 ms per token,     2.67 tokens per second)
llama_print_timings:       total time =  236164.43 ms /   950 tokens
