In [1]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

load_dotenv()
llm_local_path = "../models/model_007-70b.Q3_K_M.gguf"  

In [2]:
template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [3]:
# # Callbacks support token-wise streaming
# callbacks = [StreamingStdOutCallbackHandler()]
# 
# # Verbose is required to pass to the callback manager
# llm_gpt4all = GPT4All(model=local_path, callbacks=callbacks, verbose=True)

In [4]:
# llm_chain_gpt4all = LLMChain(prompt=prompt, llm=llm_gpt4all)

In [5]:
# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
# 
# llm_chain_gpt4all.run(question)

In [2]:
from langchain.callbacks.manager import CallbackManager
from langchain.llms import LlamaCpp

llm_cpp = LlamaCpp(
    model_path=llm_local_path,
    n_gpu_layers=20,
    n_batch=256,
    n_ctx=2048,
    f16_kv=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
    streaming=False
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [4]:
llm_chain_cpp = LLMChain(prompt=prompt, llm=llm_cpp)

In [5]:
llm_chain_cpp.run("Tell me which is better - to live or not to live?")

'\n\nTo determine whether living or not living is "better," we need to first define what we mean by "better." Do we mean it in terms of happiness, success, personal growth, relationship with others and oneself, or something else entirely? Different people have different values and priorities, so the answer will depend on an individual\'s unique perspective.\n\nFor example, if you value happiness highly, then living could be considered "better" because it allows for experiences, connections, and personal growth that can contribute to one\'s happiness. On the other hand, if you value freedom or escape from suffering highly, not living might be considered "better" since it eliminates the potential for pain and hardship.\n\nUltimately, whether living or not living is "better" depends on what each individual considers most important in their life.'

In [3]:
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate

system_template = """Create an informative and comprehensive answer for a given question based solely on the given documents. You must only use information from the given documents.
Use an unbiased and journalistic tone. Do not repeat text.
Cite the documents using [Document name] notation.
If multiple documents contain the answer, cite those documents like ‘as stated in [Document name 1], [Document name 2], etc.’.
You must include citations in your answer.
If the documents do not contain the answer to the question, say that ‘Answering is not possible given the available information.’
{context}
"""
messages = [
        SystemMessagePromptTemplate.from_template(system_template),
        HumanMessagePromptTemplate.from_template("{question}"),
    ]
prompt = ChatPromptTemplate.from_messages(messages)

In [4]:
from embeddings import get_local_embeddings
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain import FAISS


faiss_local_path = '../data/embedded_dataset/faiss/local_500/faiss_idx'
# embeddings = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
embeddings = get_local_embeddings("sentence-transformers/msmarco-distilbert-dot-v5")
db = FAISS.load_local(faiss_local_path, embeddings)

bin C:\Users\cubix\PycharmProjects\rag\venv39\lib\site-packages\bitsandbytes\libbitsandbytes_cuda118.dll


In [5]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

conversation_chain = ConversationalRetrievalChain.from_llm(
        llm_cpp,
        db.as_retriever(search_kwargs={"k": 3}),
        memory=ConversationBufferMemory(
        memory_key="chat_history", return_messages=True, output_key="answer"
    ),
        return_source_documents=True,
        combine_docs_chain_kwargs={
            "document_prompt": PromptTemplate(
                input_variables=["page_content", "Source"],
                template="Document name: {Source}\nContext:\n{page_content}",
            ),
            "prompt": prompt,
        },
    )

In [6]:
db.similarity_search('What are the problems with the military standard IP specification?')

[Document(page_content='3.  Problems with MIL Standard IP', metadata={'Source': 'rfc963', 'Title': 'Some problems with the specification of the Military Standard Internet Protocol ', 'Updates': None, 'Obsoletes': None, 'Category': None, 'ISSN': None, 'Updated by': None, 'NIC': None, 'Obsoleted by': None, 'Related RFCs': None}),
 Document(page_content='RFC 964:  Some problems with the specification of the Military Standard Transmission Control Protocol', metadata={'Source': 'rfc964', 'Title': 'Some problems with the specification of the Military Standard Transmission Control Protocol ', 'Updates': None, 'Obsoletes': None, 'Category': None, 'ISSN': None, 'Updated by': None, 'NIC': None, 'Obsoleted by': None, 'Related RFCs': None}),
 Document(page_content='In our discussion above, we have pointed out several serious problems\n   with the Military Standard IP [MILS83a] specification which must be\n   corrected to produce a running implementation conforming to this\n   standard.  We have pr

In [None]:
response = conversation_chain('What are the problems with the military standard IP specification?')

In [None]:
response['answer']

In [None]:
response['source_documents']

In [None]:
response