In [1]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

load_dotenv()
llm_local_path = "../models/mistral-7b-openorca.Q4_0.gguf"  

In [14]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [15]:
# # Callbacks support token-wise streaming
# callbacks = [StreamingStdOutCallbackHandler()]
# 
# # Verbose is required to pass to the callback manager
# llm_gpt4all = GPT4All(model=local_path, callbacks=callbacks, verbose=True)

In [16]:
# llm_chain_gpt4all = LLMChain(prompt=prompt, llm=llm_gpt4all)

In [17]:
# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
# 
# llm_chain_gpt4all.run(question)

In [9]:
from langchain.callbacks.manager import CallbackManager
from langchain.llms import LlamaCpp

llm_cpp = LlamaCpp(
    model_path=llm_local_path,
    n_gpu_layers=2,
    n_batch=512,
    n_ctx=8192,
    f16_kv=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
    streaming=False
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [19]:
llm_chain_cpp = LLMChain(prompt=prompt, llm=llm_cpp)

In [20]:
llm_chain_cpp.run("Tell me which is better - to live or not to live?")

" If you are asking about the quality of life, it depends on individual circumstances. If one does not have happiness and peace, then living becomes a burden rather than a joy. Therefore, living with constant unhappiness is not better. On the other hand, if someone has peace and happiness in their life, then living is better.\n\nNow, if you are asking about the general concept of life vs. no-life (such as living versus being dead), this question doesn't have a simple answer either.\n\nThe concept of life depends on an individual's perspective. Some people believe that to live and enjoy the experiences of life is better than nothing. Even if there are ups and downs, they still think that living gives more value in comparison to no-life (death). Others may choose not to live because they do not wish to experience suffering and pain. Therefore, it depends on one's perspective and personal choice.\n\nIn conclusion, both sides have their pros and cons based on individual circumstances, so i

In [16]:
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate

system_template = """Create an informative and comprehensive answer for a given question based solely on the given documents. You must only use information from the given documents.
Use an unbiased and journalistic tone. Do not repeat text.
Cite the documents using [Document name] notation.
If multiple documents contain the answer, cite those documents like ‘as stated in [Document name 1], [Document name 2], etc.’.
You must include citations in your answer.
If the documents do not contain the answer to the question, say that ‘Answering is not possible given the available information.’
{context}
"""
messages = [
        SystemMessagePromptTemplate.from_template(system_template),
        HumanMessagePromptTemplate.from_template("{question}"),
    ]
prompt = ChatPromptTemplate.from_messages(messages)

In [11]:
import os
from langchain.embeddings import OpenAIEmbeddings

faiss_local_path = '../data/embedded_dataset/faiss/openai_1000/faiss_idx'
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))

In [12]:
from langchain import FAISS

db = FAISS.load_local(faiss_local_path, embeddings)

In [13]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

conversation_chain = ConversationalRetrievalChain.from_llm(
        llm_cpp,
        db.as_retriever(search_kwargs={"k": 3}),
        memory=ConversationBufferMemory(
        memory_key="chat_history", return_messages=True, output_key="answer"
    ),
        return_source_documents=True,
        combine_docs_chain_kwargs={
            "document_prompt": PromptTemplate(
                input_variables=["page_content", "Source"],
                template="Document name: {Source}\nContext:\n{page_content}",
            ),
            "prompt": prompt,
        },
    )

In [14]:
response = conversation_chain('What are the problems with the military standard IP specification?')

In [18]:
response['source_documents']

[Document(page_content='RFC 963:  Some problems with the specification of the Military Standard Internet Protocol \n\n \n\n[RFC Home] [TEXT|PDF|HTML] [Tracker] [IPR] [Info page]                                                                                    UNKNOWN Network Working Group                                 Deepinder P. Sidhu\nRequest for Comments: 963                          Iowa State University\n                                                           November 1985\n\n              SOME PROBLEMS WITH THE SPECIFICATION OF THE\n                  MILITARY STANDARD INTERNET PROTOCOL\n\n\nSTATUS OF THIS MEMO\n\n   The purpose of this RFC is to provide helpful information on the\n   Military Standard Internet Protocol (MIL-STD-1777) so that one can\n   obtain a reliable implementation of this protocol standard.\n   Distribution of this note is unlimited.\n\nABSTRACT', metadata={'Source': 'rfc963', 'Title': 'Some problems with the specification of the Military Standard Int