In [1]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.llms import CTransformers
from langchain.prompts import PromptTemplate


import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATA_PATH='data/'
DB_FAISS_PATH='../vectorstore/db_faiss'

In [3]:
embeddings=HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                        model_kwargs={'device':'cpu'})



In [4]:
#embeddings.

In [5]:
db=FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)

In [6]:
query = "What is a stock?"
docs = db.similarity_search(query)

In [7]:
print(docs[0].page_content)
print(docs[1].page_content)

the one who held the largest position in this stock.
P1: a/b P2: c/d QC: e/f T1: g
c15 JWBT310-Fabozzi July 1, 2010 16:18 Printer: Courier Westford, Westford, MA
396 INVESTMENT MANAGEMENT
We can classify stocks by style in many ways. The most common is
in terms of one or more measures of growth and value. Within a growth
and value style, there is a substyle based on some measure of size, such
as market capitalization. The market capitalization of a corporation is the
total market value of its common stock outstanding, which is the product of


In [8]:
print(docs)

[Document(page_content='the one who held the largest position in this stock.', metadata={'source': 'data\\The Nature of Investing.pdf', 'page': 147}, _lc_kwargs={'page_content': 'the one who held the largest position in this stock.', 'metadata': {'source': 'data\\The Nature of Investing.pdf', 'page': 147}}), Document(page_content='P1: a/b P2: c/d QC: e/f T1: g\nc15 JWBT310-Fabozzi July 1, 2010 16:18 Printer: Courier Westford, Westford, MA\n396 INVESTMENT MANAGEMENT\nWe can classify stocks by style in many ways. The most common is\nin terms of one or more measures of growth and value. Within a growth\nand value style, there is a substyle based on some measure of size, such\nas market capitalization. The market capitalization of a corporation is the\ntotal market value of its common stock outstanding, which is the product of', metadata={'source': 'data\\The Basics of Finance An Introduction to Financial Markets, Business Finance, and Portfolio Management.pdf', 'page': 411}, _lc_kwargs={'

In [9]:
retriever = db.as_retriever()
docs = retriever.invoke(query)

In [10]:
print(docs[0].page_content)
print(docs[1].page_content)

the one who held the largest position in this stock.
P1: a/b P2: c/d QC: e/f T1: g
c15 JWBT310-Fabozzi July 1, 2010 16:18 Printer: Courier Westford, Westford, MA
396 INVESTMENT MANAGEMENT
We can classify stocks by style in many ways. The most common is
in terms of one or more measures of growth and value. Within a growth
and value style, there is a substyle based on some measure of size, such
as market capitalization. The market capitalization of a corporation is the
total market value of its common stock outstanding, which is the product of


In [20]:
context=docs[0].page_content
question=query

In [21]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.

    Context: {context}
    Question: {question}

    Only return the helpful answer below and nothing else. Try to make it short. Maximum of 500 words.
    Helpful answer:
    """

In [24]:
def set_prompt(context,question):
    
    custom_prompt_template = f"""Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.

    Context: {context}
    Question: {question}

    Only return the helpful answer below and nothing else. Try to make it short. Maximum of 500 words.
    Helpful answer:
    """

    return custom_prompt_template

In [25]:
prompt=set_prompt(context,question)
print(prompt)

Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.

    Context: the one who held the largest position in this stock.
    Question: What is a stock?

    Only return the helpful answer below and nothing else. Try to make it short. Maximum of 500 words.
    Helpful answer:
    


def load_llm():
        # Load the locally downloaded model here
        llm = CTransformers(
            model = "TheBloke/Llama-2-7B-Chat-GGML",
            model_type="llama",
            max_new_tokens = 512,
            temperature = 0.5
        )
        return llm 

llm=load_llm()

print(llm('AI is going to'))

In [29]:
model = AutoModelForCausalLM.from_pretrained(
    "TinyPixel/Llama-2-7B-bf16-sharded",
    # cache_dir="/data/yash/base_models",
    device_map='auto'
)

tokenizer = AutoTokenizer.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded", 
                                          # cache_dir="/data/yash/base_models"
                                         )

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading shards: 100%|██████████| 14/14 [42:49<00:00, 183.55s/it]
Loading checkpoint shards: 100%|██████████| 14/14 [00:19<00:00,  1.39s/it]


In [30]:
def get_llama2_chat_reponse(prompt, max_new_tokens=1000):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature= 0.00001)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [31]:
print(get_llama2_chat_reponse(prompt, max_new_tokens=1000))



KeyboardInterrupt: 