In [2]:
from torch import cuda, bfloat16
import transformers , torch

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
print(device)



cuda:0


In [5]:

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)


stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids



ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [4]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

res = generate_text("how can i'm use llama with the langchain?.")
print(res[0]["generated_text"])


NameError: name 'model' is not defined

### Langchain

Iniciando o uso do HF pipeline com o LangChain 


In [3]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

#llm(prompt="how can i'm use llama with the langchain?.")

NameError: name 'generate_text' is not defined

### Iniciando o uso de uma leitura de documentos

Segue o código a baixo

In [24]:
from langchain.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyPDFLoader

from langchain_community.document_loaders.csv_loader import CSVLoader

#file_path = ("/home/rafael/Desktop/LFA/LFA2.pdf")

loaderCSV = CSVLoader(file_path='/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv')

dataCSV = loaderCSV.load()

#loader = PyPDFLoader(file_path)
#pages = loader.load_and_split()

#pages[0]

print(dataCSV)



[Document(metadata={'source': '/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv', 'row': 0}, page_content='data;"valor": 04/06/1986;"0\nNone: 065041"'), Document(metadata={'source': '/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv', 'row': 1}, page_content='data;"valor": 05/06/1986;"0\nNone: 067397"'), Document(metadata={'source': '/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv', 'row': 2}, page_content='data;"valor": 06/06/1986;"0\nNone: 066740"'), Document(metadata={'source': '/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv', 'row': 3}, page_content='data;"valor": 09/06/1986;"0\nNone: 068247"'), Document(metadata={'source': '/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv', 'row': 4}, page_content='data;"valor": 10/06/1986;"0\nNone: 067041"'), Document(metadata={'source': '/home/rafael/Python_projects/LangChain/bcdata.sgs.11.csv', 'row': 5}, page_content='data;"valor": 11/06/1986;"0\nNone: 066740"'), Document(metadata={'source': '/home/raf

## Load WEB
carregando arquivos da web

In [13]:
from langchain.document_loaders import WebBaseLoader

web_links = ["https://www.databricks.com/","https://help.databricks.com","https://databricks.com/try-databricks","https://help.databricks.com/s/","https://docs.databricks.com","https://kb.databricks.com/","http://docs.databricks.com/getting-started/index.html","http://docs.databricks.com/introduction/index.html","http://docs.databricks.com/getting-started/tutorials/index.html","http://docs.databricks.com/release-notes/index.html","http://docs.databricks.com/ingestion/index.html","http://docs.databricks.com/exploratory-data-analysis/index.html","http://docs.databricks.com/data-preparation/index.html","http://docs.databricks.com/data-sharing/index.html","http://docs.databricks.com/marketplace/index.html","http://docs.databricks.com/workspace-index.html","http://docs.databricks.com/machine-learning/index.html","http://docs.databricks.com/sql/index.html","http://docs.databricks.com/delta/index.html","http://docs.databricks.com/dev-tools/index.html","http://docs.databricks.com/integrations/index.html","http://docs.databricks.com/administration-guide/index.html","http://docs.databricks.com/security/index.html","http://docs.databricks.com/data-governance/index.html","http://docs.databricks.com/lakehouse-architecture/index.html","http://docs.databricks.com/reference/api.html","http://docs.databricks.com/resources/index.html","http://docs.databricks.com/whats-coming.html","http://docs.databricks.com/archive/index.html","http://docs.databricks.com/lakehouse/index.html","http://docs.databricks.com/getting-started/quick-start.html","http://docs.databricks.com/getting-started/etl-quick-start.html","http://docs.databricks.com/getting-started/lakehouse-e2e.html","http://docs.databricks.com/getting-started/free-training.html","http://docs.databricks.com/sql/language-manual/index.html","http://docs.databricks.com/error-messages/index.html","http://www.apache.org/","https://databricks.com/privacy-policy","https://databricks.com/terms-of-use"]

loader1 = WebBaseLoader(web_links)
documents = loader1.load()
     

In [25]:
### CREATING CHINKS USING TEXT SPLITTERS

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=20)
#all_splits = text_splitter.split_documents(documents)

csvSplit = text_splitter.split_documents(dataCSV)

#split_PDF = text_splitter.split_documents(pages)

### CREATING EMBEDDINGS

Criando embeddings e armazenando in a vector store


In [27]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device" : "cuda"}

embeddings = HuggingFaceBgeEmbeddings(model_name = model_name , model_kwargs = model_kwargs)

#Armazenando no armazem de vetores

#vectorstore = FAISS.from_documents(all_splits , embeddings)
vectorstore = FAISS.from_documents(csvSplit , embeddings)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## INICIANDO A CADEIA

In [1]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm , vectorstore.as_retriever() , return_source_documents=True)


chat_hist = []

query1 = "Base on the documment give the prediction to the next 5 months of 2024 no excuses only give me the prediction"
result = chain({"question" : query1 , "chat_history" : chat_hist})

print(result['answer'])

NameError: name 'llm' is not defined

In [20]:
print(result['source_documents'])

[Document(metadata={'source': 'http://docs.databricks.com/lakehouse-architecture/index.html', 'title': 'Introduction to the well-architected data lakehouse | Databricks on AWS', 'description': 'Introduction to articles that describe principles and best practices for the implementation and operation of the Databricks lakehouse.', 'language': 'en-US'}, page_content='Articles about lakehouse architecture \n\nThe scope of the lakehouse \nThe first step to designing your data architecture with the Databricks Data Intelligence Platform is understanding its building blocks and how they would integrate with your systems. See The scope of the lakehouse platform.\n\n\nGuiding principles for the lakehouse \nGround rules that define and influence your architecture. They explain the vision behind a lakehouse implementation and form the basis for future decisions on your data, analytics, and AI architecture. See Guiding principles for the lakehouse.\n\n\nDownloadable lakehouse reference architecture

## AGORA PARA O PDF

Vamos lá

In [21]:
vectorstore2 = FAISS.from_documents(split_PDF , embeddings)

chain2 = ConversationalRetrievalChain.from_llm(llm , vectorstore2.as_retriever() , return_source_documents=True)




In [22]:
chat2_hist = []

query3 = "Describe a nonderteministic automata for me?"
result2 = chain2({"question" : query3 , "chat_history" : chat2_hist})

print(result2['answer'])


print(result2['source_documents'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

smaller than its deterministic counterpart, or its functioning may be easier to
understand. Nondeterminism in ﬁnite automata is also a good introduction
to nondeterminism in more powerful computational models because ﬁnite au-
tomata are especially easy to understand. Now we turn to several examples of
NFAs.
Copyright 2012 Cengage Learning. All Rights Reserved. May not be copied, scanned, or duplicated, in whole or in part. Due to electronic rights, some third party content may be suppressed from the eBook and/or eChapter(s). Editorial review has deemed that any suppressed content does not materially affect the overall learning experience. Cengage Learning reserves the right to remove additional content at any time if subsequent rights restrictions require it.

unique way from the preceding step. When the machine is in a giv