# Instalação de bibliotecas

In [1]:
!pip install -qU transformers accelerate einops langchain xformers bitsandbytes faiss-gpu sentence_transformers unstructured

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m64.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m76.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ..

# Carregando o modelo do huggingface

In [2]:
from torch import cuda, bfloat16
import transformers

model_id = 'ianagra/Llama-2-7b-ALLM-virtual-sales-assistant'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

Downloading (…)lve/main/config.json:   0%|          | 0.00/630 [00:00<?, ?B/s]



Downloading (…)model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/162 [00:00<?, ?B/s]

Model loaded on cuda:0


##Configurando tokenizer

In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

Downloading (…)okenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]



Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

##Definindo critério de parada para a geração de tokens

In [4]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

##Convertendo os tokens de parada em objetos LongTensor

In [5]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

##Criando função que analisa se há algum token critério de parada

In [6]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

##Definição de parâmetros adicionais

In [7]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

##Testando o funcionamento do modelo

In [8]:
res = generate_text("I have received a damaged product.")
print(res[0]["generated_text"])

I have received a damaged product. nobody has responded to my emails or phone calls. what can i do?
We apologize for any inconvenience you've experienced with your damaged product. Please contact our customer service department at (800) 523-1974 or email us at [support@mygiftcard.com](mailto:support@mygiftcard.com) and we will be happy to assist you. We are available Monday through Friday from 8am to 5pm PST.


#Implementando pipeline do huggingface no Langchain

In [9]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

# checking again that everything is working fine
llm(prompt="Explain me the difference between Data Lakehouse and Data Warehouse.")

" Unterscheidung between data lakehouse and data warehouse? A data lakehouse is a centralized repository that stores all of an organization's data, both structured and unstructured, in a single location. A data warehouse, on the other hand, is a repository that stores structured data in a specific format for easy querying and analysis.\n\n\n"

#Realizando a ingestão dos dados

In [10]:
from google.colab import drive
drive.mount('/drive')

Mounted at /drive


In [11]:
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path="/drive/MyDrive/train.csv",
        encoding='utf-8',
        source_column="Order ID",
        csv_args={
            "delimiter": ",",
            "quotechar": '"',
            "fieldnames": ["Row ID", "Order ID", "Order Date", "Ship Date", "Ship Mode", "Customer ID", "Customer Name", "Segment", "Country", "City", "State", "Postal Code", "Region", "Product ID", "Category", "Sub-Category", "Product Name", "Sales"]
    },
)
data = loader.load()

In [None]:
from langchain.document_loaders.csv_loader import UnstructuredCSVLoader

loader = UnstructuredCSVLoader(
    file_path="/drive/MyDrive/train.csv", mode="elements"
)
docs = loader.load()

#Dividindo os dados em chunks

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(data)

#Criando e armazenando os embeddings

In [12]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(data, embeddings)

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

#Inicializando a chain

In [13]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

#Fazendo perguntas à base de dados

In [14]:
chat_history = []

query = "I received a damaged product."
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 I apologize for any inconvenience this has caused. Can you please provide me with the Row ID and Order ID associated with this order? This will allow me to look into this matter further and assist you in resolving the issue.


#Incluindo as perguntas e respostas anteriores no histórico

In [16]:
chat_history = [(query, result["answer"])]

query = "The Row ID is 9750, the Order ID is CA-2017-158358 and the damaged product is Logitech G602 Wireless Gaming Mouse."
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

 Yes, you can apply for a warranty replacement by contacting our customer service department and providing your order details and proof of purchase.


#Ver a fonte de informação utilizada na resposta

In [17]:
print(result['source_documents'])

[Document(page_content='Row ID: 8881\nOrder ID: US-2016-107944\nOrder Date: 23/03/2016\nShip Date: 25/03/2016\nShip Mode: First Class\nCustomer ID: AM-10360\nCustomer Name: Alice McCarthy\nSegment: Corporate\nCountry: United States\nCity: Los Angeles\nState: California\nPostal Code: 90008\nRegion: West\nProduct ID: TEC-AC-10000736\nCategory: Technology\nSub-Category: Accessories\nProduct Name: Logitech G600 MMO Gaming Mouse\nSales: 239.97', metadata={'source': 'US-2016-107944', 'row': 8881}), Document(page_content='Row ID: 4036\nOrder ID: US-2017-108777\nOrder Date: 12/12/2017\nShip Date: 12/12/2017\nShip Mode: Same Day\nCustomer ID: JM-15655\nCustomer Name: Jim Mitchum\nSegment: Corporate\nCountry: United States\nCity: Lorain\nState: Ohio\nPostal Code: 44052\nRegion: East\nProduct ID: TEC-AC-10002567\nCategory: Technology\nSub-Category: Accessories\nProduct Name: Logitech G602 Wireless Gaming Mouse\nSales: 383.952', metadata={'source': 'US-2017-108777', 'row': 4036}), Document(page_co