In [None]:
!pip install gpt4all langchain==0.0.342

#### download model here https://gpt4all.io/index.html

In [None]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.prompts import PromptTemplate

In [None]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [None]:
local_path = (
    "./models/mistral-7b-openorca.Q4_0.gguf"  # replace with your desired local file path
)

In [None]:
# Callbacks support token-wise streaming
callbacks = [StreamingStdOutCallbackHandler()]

# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)

In [None]:
# create LLM chain
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [None]:
#basic general LLM knowledge
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"

llm_chain.run(question)

In [None]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter


from langchain.document_loaders import WikipediaLoader
 
# Load content from Wikipedia using WikipediaLoader
loader = WikipediaLoader("Machine_learning")
docs= loader.load()

#split in chunks of 2000 characters - max input size for GPT 2000 and a bit
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
docs = text_splitter.split_documents(docs)

#get embeddings
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

persist_directory = 'db/chroma_3/'

# Create the vector store
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory=persist_directory
)

In [None]:
vectordb

In [None]:
template = """Answer the question in your own words from the 
context given to you.
If questions are asked where there is no relevant context available, please answer from 
what you know.

Context: {context}

Human: {question}
Assistant:"""

prompt = PromptTemplate(
input_variables=["context",  "question"], template=template)

In [None]:
#retrieval 
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever(search_kwargs={"k": 5}),
    return_source_documents=True,
    verbose=False,
    chain_type_kwargs={
        "verbose": False,
        "prompt": prompt
    }
)

In [None]:
res = qa("What is overfitting")