In [10]:
import os
import getpass
import sqlite3
import pandas as pd
import time
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain_chroma import Chroma
import chromadb
import yaml

In [11]:
conn = sqlite3.connect("data/database.db")
tickers = pd.read_sql_query("SELECT distinct Security FROM master_ticker", conn)["Security"].tolist()

In [12]:
with open("keys.yaml") as keys:
    try:
        api_keys = yaml.safe_load(keys)
    except yaml.YAMLError as exc:
        print(exc)

In [13]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_keys["hf_model"]

llm = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    temperature=0,
    repetition_penalty=1.03,
)

chat_model = ChatHuggingFace(llm=llm)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\feder\.cache\huggingface\token
Login successful


### PDF Loader

In [14]:
loader = PyPDFDirectoryLoader("data/wikipedia/")
docs = loader.load()

In [15]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=HuggingFaceEmbeddings())



In [16]:
from langchain import hub
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
model_name = 'HuggingFaceH4/zephyr-7b-beta'
bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_use_double_quant=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Downloading shards:  12%|█▎        | 1/8 [10:18<1:12:10, 618.68s/it]


KeyboardInterrupt: 

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain.chains import LLMChain

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=400,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}


<|user|>
{question}

<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

Please use the `langsmith sdk` instead:
  pip install langsmith
Use the `pull_prompt` method.
  res_dict = client.pull_repo(owner_repo_commit)


HfHubHTTPError: 422 Client Error: Unprocessable Entity for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta (Request ID: _lRGloEwA98gfn5XOT0JB)

Input validation error: `temperature` must be strictly positive
Make sure 'text-generation' task is supported by the model.