#### **Output Parser**

In [22]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
from langchain_community.llms import Ollama

In [11]:
chat = Ollama(model='llama3')

In [20]:
response_schemas = [
    ResponseSchema(name='bad_string', description='This is a poorly formatted user input string'),
    ResponseSchema(name='good_string', description='This is your response, a reformatted response'),
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [21]:
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This is a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [24]:
template = """
You are will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

%USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=['user_input'],
    partial_variables={"format_instructions" : format_instructions},
    template=template
)

promptValue = prompt.format(user_input="Welcome to Californiya!")
print(promptValue)


You are will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This is a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

%USER INPUT:
Welcome to Californiya!

YOUR RESPONSE:



In [25]:
llm_output = chat.invoke(promptValue)

In [26]:
print(llm_output)

Here is the reformatted string:

```json
{
    "bad_string": "Welcome to Californiya!",
    "good_string": "Welcome to California!"
}
```

Note that I corrected the spelling of "Californya" to "California". Let me know if you have any further requests!


In [29]:
parsed_output = output_parser.parse(llm_output)
print(parsed_output)

{'bad_string': 'Welcome to Californiya!', 'good_string': 'Welcome to California!'}


#### **Text Splitters**
Often times your document is too long (like a book) for your LLM. You need to split it up into chunks. Text splitters help with this.

There are many ways you could split your text into chunks, experiment with [different ones](https://python.langchain.com/en/latest/modules/indexes/text_splitters.html) to see which is best for you.

In [31]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open('hp.txt', 'r') as f:
    data = f.read()

print(f'Length of the data : {len([data])}')

Length of the data : 1


In [33]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
) 

texts = text_splitter.create_documents([data]) 

In [34]:
print(f'You have {len(texts)} documents.')

You have 532 documents.


#### **Retrievers**
Easy way to combine documents with language models.

There are many different types of retrievers, the most widely supported is the VectoreStoreRetriever

In [40]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
# from langchain.embeddings import OllamaEmbeddings # Depricated 
from langchain_ollama.embeddings import OllamaEmbeddings

loader = TextLoader('hp.txt')
documents = loader.load()

In [44]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100
)

texts = text_splitter.split_documents(documents)

embeddings = OllamaEmbeddings(model='llama3')

db = FAISS.from_documents(texts[:1], embeddings)

In [48]:
retriever = db.as_retriever()

In [53]:
docs = retriever.get_relevant_documents('What is the place name here')

In [58]:
docs, len(docs)

([Document(id='c97a7e45-5940-41f4-ab84-d9453372387c', metadata={'source': 'hp.txt'}, page_content="Harry Potter and the Sorcerer's Stone\n\n\nCHAPTER ONE\n\nTHE BOY WHO LIVED\n\nMr. and Mrs. Dursley, of number four, Privet Drive, were proud to say\nthat they were perfectly normal, thank you very much. They were the last\npeople you'd expect to be involved in anything strange or mysterious,\nbecause they just didn't hold with such nonsense.\n\nMr. Dursley was the director of a firm called Grunnings, which made\ndrills. He was a big, beefy man with hardly any neck, although he did\nhave a very large mustache. Mrs. Dursley was thin and blonde and had\nnearly twice the usual amount of neck, which came in very useful as she\nspent so much of her time craning over garden fences, spying on the\nneighbors. The Dursleys had a small son called Dudley and in their\nopinion there was no finer boy anywhere.")],
 1)

In [67]:
l = "\n".join([x.page_content for x in docs])
print(l)

Harry Potter and the Sorcerer's Stone


CHAPTER ONE

THE BOY WHO LIVED

Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say
that they were perfectly normal, thank you very much. They were the last
people you'd expect to be involved in anything strange or mysterious,
because they just didn't hold with such nonsense.

Mr. Dursley was the director of a firm called Grunnings, which made
drills. He was a big, beefy man with hardly any neck, although he did
have a very large mustache. Mrs. Dursley was thin and blonde and had
nearly twice the usual amount of neck, which came in very useful as she
spent so much of her time craning over garden fences, spying on the
neighbors. The Dursleys had a small son called Dudley and in their
opinion there was no finer boy anywhere.


#### **Chat Message History**

In [68]:
from langchain.memory import ChatMessageHistory
from langchain_community.llms import Ollama

chat = Ollama(model='llama3')

history = ChatMessageHistory()

history.add_ai_message('Hi')

history.add_user_message('What is a capital for India?')

In [69]:
history.messages

[AIMessage(content='Hi', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What is a capital for India?', additional_kwargs={}, response_metadata={})]

In [72]:
ai_response = chat.invoke(history.messages)
ai_response

'The capital of India is New Delhi.'

In [73]:
history.add_ai_message(ai_response)

In [75]:
history.messages

[AIMessage(content='Hi', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='What is a capital for India?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='The capital of India is New Delhi.', additional_kwargs={}, response_metadata={})]