***
# LangChain
***

# Imports

In [None]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.prompts import PromptTemplate

# Use GPT4ALL

In [None]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])


In [None]:
local_path=("/Users/christine/Downloads/gpt4all-falcon-q4_0.gguf")

In [None]:
# Callbacks support token-wise streaming
callbacks = [StreamingStdOutCallbackHandler()]

# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)


In [None]:
# create LLM chain
llm_chain = LLMChain(prompt=prompt, llm=llm)


In [None]:
#basic general LLM knowledge
question = """can you simplify this text for dyslexia people:
When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.
The floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.
At each end of the room, on the wall, hung a beautiful bear-skin rug.
These rugs were for prizes, one for the girls and one for the boys. And this was the game.
The girls were gathered at one end of the room and the boys at the other, and one end was called the North Pole, and the other the South Pole. Each player was given a small flag which they were to plant on reaching the Pole.
This would have been an easy matter, but each traveller was obliged to wear snowshoes."""

llm_chain.run(question)


In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter


from langchain.document_loaders import WikipediaLoader

# Load content from Wikipedia using WikipediaLoader
loader = WikipediaLoader("Machine_learning")
docs= loader.load()

#split in chunks of 2000 characters - max input size for GPT 2000 and a bit
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
docs = text_splitter.split_documents(docs)

#get embeddings
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

persist_directory = 'db/chroma_3/'

# Create the vector store
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory=persist_directory
)


In [None]:
template = """Please generate a short summary of machine learning
for the dyslexic people.


Context: {context}

Human: {question}
Assistant:"""

prompt = PromptTemplate(
input_variables=["context",  "question"], template=template)


In [None]:
#retrieval
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever(search_kwargs={"k": 5}),
    return_source_documents=True,
    verbose=False,
    chain_type_kwargs={
        "verbose": False,
        "prompt": prompt
    }
)


In [None]:
res = qa("generate a 100 words simplifiy text of Machine_learning for dyslexic students")


# Prompting

In [None]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("/Users/chenpeiyu/Desktop/A_Comparative_Study_of_Dyslexia_Style_Guides_in_Im.pdf")
documents = loader.load_and_split()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
from langchain.vectorstores import Chroma
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

In [None]:
from langchain.llms import GPT4All
model_path = "/Users/chenpeiyu/code/ChristineSi/neuroCraft/raw_data/gpt4all-falcon-q4_0.gguf"
llm = GPT4All(model=model_path, verbose=False)

In [None]:
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate

prompt_template = " please simplify the following text for dyslexic patience :\
Mr. Grimes was to come up next morning to Sir John Harthover's, \
at the Place, for his old chimney-sweep was gone to prison, \
and the chimneys wanted sweeping. And so he rode away, \
not giving Tom time to ask what the sweep had gone to prison for, \
which was a matter of interest to Tom, as he had been in prison once or twice himself. \
Moreover, the groom looked so very neat and clean, with his drab gaiters, drab breeches, \
drab jacket, snow-white tie with a smart pin in it, and clean round ruddy face, that Tom was offended and disgusted at his appearance, and considered him a stuck-up fellow, who gave himself airs because he wore smart clothes, and other people paid for them; and went behind the wall to fetch the half-brick after all; but did not, remembering that he had come in the way of business, and was, as it were, under a flag of truce."

llm = GPT4All(model=model_path, verbose=False)
llm_chain = LLMChain(llm=llm, prompt=PromptTemplate.from_template(prompt_template))
llm_chain.predict()

# PDF reader

In [None]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("/Users/chenpeiyu/Desktop/A_Comparative_Study_of_Dyslexia_Style_Guides_in_Im.pdf")
documents = loader.load_and_split()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_documents(documents)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
from langchain.vectorstores import Chroma
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

In [None]:
from langchain.llms import GPT4All
model_path = "/Users/chenpeiyu/code/ChristineSi/neuroCraft/raw_data/gpt4all-falcon-q4_0.gguf"
llm = GPT4All(model=model_path, verbose=False)

In [None]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True,
    verbose=False,
)

In [None]:
res = qa("simplify this text for dyslexic patience")

In [None]:
res['result']