In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
from pprint import pprint
from dotenv import load_dotenv,find_dotenv
import os


In [2]:
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.embeddings import SentenceTransformerEmbeddings

In [3]:
from sentence_transformers import SentenceTransformer
import uuid
from groq import Groq

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
load_dotenv()

True

In [96]:
os.getenv("REDIS_PASSWORD")

''

In [5]:
os.getenv("GROQ_API_KEY")

'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'

In [88]:
load_dotenv(find_dotenv())  

True

In [6]:
os.environ.get("GROQ_API_KEY")

'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'

In [5]:
class PyGroq:
    def __init__(self,temperature=1,max_tokens=1024,top_p=1,stream=False,stop=None,seed=10,model="llama3-70b-8192") -> None:
        load_dotenv(find_dotenv())  
        self.KEY =  os.environ.get("GROQ_API_KEY")

        self.client = Groq(
            api_key=self.KEY,
        )

        self.instance_data = {
            "id":uuid.uuid4(),
            "workspace":[
               
            ],

            "hiper_params":{
                "temperature":temperature,
                "max_tokens":max_tokens,
                "top_p":top_p,
                "stream":stream,
                "stop":stop,
                "seed":seed,
                "model":model
            }
        }

    def createWorkspace(self,name="None"):
        id = uuid.uuid4()
        self.instance_data["workspace"].append(
            {
                "id":id,
                "messages":[],
                "config":{
                    "system_paradigma":[],
                    "name":name
                }
            }
        )

        return id
    
    def getWorkspace(self,id):
        workspace = [i for i in self.instance_data["workspace"] if i["id"] == id]

        if len(workspace)>=1:
            return workspace[0]
        else:
            return None
        
    def update_workspace_history(self,payload,workspaceId):
        workspace = self.getWorkspace(workspaceId)
        
        if not workspace:
            return {"message":"workspace not found"}
        
        workspace["messages"].append(payload)

        for i in range(len(self.instance_data["workspace"])):
            if self.instance_data["workspace"][i]["id"] == workspaceId:
                self.instance_data["workspace"][i] = workspace
                break

        return workspace
    
    def update_workspace_paradigma(self,payload,workspaceId):
        workspace = self.getWorkspace(workspaceId)
        
        if not workspace:
            return {"message":"workspace not found"}
        
        workspace["config"]["system_paradigma"].append({"role":"system","content":payload})

        for i in range(len(self.instance_data["workspace"])):
            if self.instance_data["workspace"][i]["id"] == workspaceId:
                self.instance_data["workspace"][i] = workspace
                break

        return workspace

    def quest_something(self,input,workspaceId):
        messages = [j for i in self.instance_data["workspace"] if i["id"] == workspaceId for j in i["config"]["system_paradigma"]]
        
        updated_workspace = self.update_workspace_history({"role":"user","content":input},workspaceId)

        for i in updated_workspace["messages"]:
            messages.append(i)

        chat_completion = self.client.chat.completions.create(
            messages=messages,
            temperature=self.instance_data["hiper_params"]["temperature"],
            max_tokens=self.instance_data["hiper_params"]["max_tokens"],
            top_p=self.instance_data["hiper_params"]["top_p"],
            stream=self.instance_data["hiper_params"]["stream"],
            stop=self.instance_data["hiper_params"]["stop"],
            seed=self.instance_data["hiper_params"]["seed"],
            model=self.instance_data["hiper_params"]["model"],
        )

        response = chat_completion.choices[0].message.content

        updated_workspace = self.update_workspace_history({"role":"assistant","content":response},workspaceId)

        return response
    
        

In [15]:
class QDrantConcrete:
    def __init__(self):
        self.base_url = os.getenv("QDRANT_HOST")
        self.client = QdrantClient(self.base_url)
        self.main_collection_name = "sinaX"
        self.vectordim = 768 
        self.Distance = Distance 
        self.base_model = "all-MiniLM-L6-v2"  
        self.selected_model = None 

    def define_embbedings_model(self, model=None):
        self.model = SentenceTransformer(model)
        self.selected_model = model

    def create_collection(self, collection_name, size=768, distance=Distance.COSINE):
        print(f"Criando a coleção '{collection_name}'...")
        self.client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=size, distance=distance),
        )
          
    def insert_vectors(self, collection_name, vectors, ids=None, payloads=None):
        points = []
        for i in range(len(vectors)):
            point = {
                'id': ids[i] if ids else str(uuid.uuid4()),  # Gera UUIDs se não for passado
                'vector': vectors[i],
            }
            if payloads:
                point['payload'] = payloads[i]  # Adiciona payload, se fornecido
            points.append(point)
        
        self.client.upsert(
            collection_name=collection_name,
            points=points
        )

    def generate_embeddings(self, texts):
        embeddings = self.model.encode(texts)
        return embeddings

    def search_vectors(self, collection_name, query_text, limit=5):
        query_vector = self.generate_embeddings([query_text])[0]
        result = self.client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=limit
        )
        return result

    def get_qdrant_vectorstore(self, collection_name):
        embeddings = SentenceTransformerEmbeddings(model=self.model)
        vectorstore = Qdrant(client=self.client, collection_name=collection_name, embeddings=embeddings)
        return vectorstore

    def delete_collection(self, collection_name):
        self.client.delete_collection(collection_name=collection_name)

    def describe_collection(self, collection_name):
        collection_info = self.client.get_collection(collection_name)
        return collection_info

    def get_vector(self, collection_name, vector_id):
        vector_data = self.client.retrieve(
            collection_name=collection_name,
            ids=[vector_id]
        )
        return vector_data

    def update_vectors(self, collection_name, vector_ids, new_vectors, payloads=None):
        points = []
        for i in range(len(new_vectors)):
            point = {
                'id': vector_ids[i],
                'vector': new_vectors[i],
            }
            if payloads:
                point['payload'] = payloads[i]
            points.append(point)
        
        self.client.upsert(
            collection_name=collection_name,
            points=points
        )

    def upsert_vectors(self, collection_name, vectors, ids=None, payloads=None):
        if ids is None:
            ids = [str(uuid.uuid4()) for _ in range(len(vectors))]
        
        self.insert_vectors(collection_name, vectors, ids, payloads)

    def process_document(self, document_text):
        paragraphs = document_text.split("\n")
        return paragraphs

    def store_document_in_qdrant(self, collection_name, document_text):
        document_id = str(uuid.uuid4())
        parts = self.process_document(document_text)
        embeddings = self.generate_embeddings(parts)
        ids = [str(uuid.uuid4()) for _ in range(len(parts))]
        payloads = [{"content": part} for part in parts]
        
        self.upsert_vectors(collection_name, embeddings, ids, payloads)
        return document_id

    def delete_document_from_qdrant(self, collection_name, document_id):
        self.client.delete(
            collection_name=collection_name,
            points_selector={'ids': [document_id]}
        )
        print(f"Documento {document_id} e seus embeddings foram deletados.")

In [16]:
class QDrantImplementation(QDrantConcrete):
    _instance = None 

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(QDrantConcrete, cls).__new__(cls) 
        return cls._instance

    def __init__(self):
        if not hasattr(self, "_initialized"):
            super().__init__()
            self._initialized = True

In [None]:
qdrant_client = QDrantImplementation()

In [6]:
pygro_client =  PyGroq()

## Testes do PyGROQ

In [7]:
pygro_client.instance_data

{'id': UUID('e12aee8f-c239-414b-a7e3-84ad91626366'),
 'workspace': [],
 'hiper_params': {'temperature': 1,
  'max_tokens': 1024,
  'top_p': 1,
  'stream': False,
  'stop': None,
  'seed': 10,
  'model': 'llama3-70b-8192'}}

In [8]:
id = pygro_client.createWorkspace(name="SinaX")

In [9]:
id

UUID('94e9e0ae-6151-457a-a95d-08d9382452b2')

In [17]:
pygro_client.getWorkspace(id)

{'id': UUID('94e9e0ae-6151-457a-a95d-08d9382452b2'),
 'messages': [{'role': 'user', 'content': 'Qual é o seu escopo de respostas?'},
  {'role': 'assistant',
   'content': 'Eu sou treinado para responder a perguntas relacionadas a tecnologia, incluindo, mas não limitado a:\n\n* Programação (linguagens, frameworks, bibliotecas, etc.)\n* Desenvolvimento web e mobile\n* Banco de dados e armazenamento de dados\n* Redes e segurança de rede\n* Hardware e componentes eletrônicos\n* Software e aplicativos\n* Inteligência artificial e machine learning\n* Ciência de dados e análise de dados\n* Internet das coisas (IoT)\n* Segurança de informação e criptografia\n* E muito mais!\n\nSe tiver alguma pergunta relacionada a tecnologia, estou aqui para ajudar!'},
  {'role': 'user', 'content': 'Quem é bolsonaro?'},
  {'role': 'assistant',
   'content': 'Eu não tenho resposta para essa pergunta, pois não tem relação com tecnologia.'},
  {'role': 'user', 'content': 'Quem é bolsonaro?'},
  {'role': 'assista

In [11]:
pygro_client.update_workspace_paradigma("Não responda nenhuma pergunta que não tenha a ver com tecnologia",id)

{'id': UUID('94e9e0ae-6151-457a-a95d-08d9382452b2'),
 'messages': [],
 'config': {'system_paradigma': [{'role': 'system',
    'content': 'Não responda nenhuma pergunta que não tenha a ver com tecnologia'}],
  'name': 'SinaX'}}

In [16]:
pygro_client.quest_something("Quem é bolsonaro?",id)

'Não vou responder a essa pergunta pois não tem relação com tecnologia. Se tiver alguma pergunta sobre tecnologia, estou aqui para ajudar!'

## Testes do QDrant

In [18]:
qdrant_client.main_collection_name

'sinaX'

In [25]:
r = qdrant_client.create_collection(qdrant_client.main_collection_name,size=qdrant_client.vectordim)

Criando a coleção 'sinaX'...


In [19]:
qdrant_client.define_embbedings_model("bert-base-nli-mean-tokens")

In [20]:
qdrant_client.selected_model

'bert-base-nli-mean-tokens'

In [21]:
qdrant_client.describe_collection(qdrant_client.main_collection_name)

CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=None, indexed_vectors_count=0, points_count=138, segments_count=8, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=768, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), qua

In [24]:
r = qdrant_client.delete_collection(qdrant_client.main_collection_name)

In [22]:
with open("procedimento_interno.txt","r",encoding="UTF-8") as file:
    text = file.read()

In [26]:
text

'Procedimento Interno para Registro de Estoque\n\n1. ObjetivoEstabelecer diretrizes para o registro e controle de estoque, garantindo precisão nas informações, rastreabilidade dos produtos e otimização dos processos logísticos.\n\n2. AbrangênciaEste procedimento se aplica a todos os colaboradores responsáveis pelo armazenamento, movimentação e controle de estoque na empresa.\n\n3. Responsabilidades\n\nSetor de Almoxarifado: Registrar a entrada e saída de produtos no sistema de gestão de estoque.\n\nSetor de Compras: Informar ao almoxarifado sobre pedidos de reposição e prazos de entrega.\n\nSetor Financeiro: Acompanhar os registros para controle de custos e inventário.\n\nGestão Operacional: Monitorar e auditar os registros periodicamente.\n\n4. Procedimentos\n\n4.1 Registro de Entrada de Estoque\n\nConferir a Nota Fiscal e verificar se os produtos recebidos correspondem ao pedido realizado.\n\nRegistrar a entrada no sistema de gestão, informando:\n\nCódigo do produto\n\nQuantidade rec

In [70]:
text_process = qdrant_client.process_document(text)

In [27]:
document_id = qdrant_client.store_document_in_qdrant(qdrant_client.main_collection_name, text)

In [28]:
query = "Quais são as responsabilidades?"
search_results = qdrant_client.search_vectors(qdrant_client.main_collection_name, query)
print("Resultados da pesquisa:", search_results)

Resultados da pesquisa: [ScoredPoint(id='d812aec2-04b7-4a4a-acd0-5f3cb65c695a', version=0, score=0.85471547, payload={'content': '6. Revisão e AtualizaçãoEste procedimento deve ser revisado anualmente ou sempre que houver mudanças nos processos de estoque.'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id='8b6b5120-3cad-408c-9eb0-0568041801ba', version=0, score=0.85447884, payload={'content': '2. AbrangênciaEste procedimento se aplica a todos os colaboradores responsáveis pelo armazenamento, movimentação e controle de estoque na empresa.'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id='5bb6d417-5394-4db0-9747-1a9c649268a3', version=0, score=0.84839046, payload={'content': 'Comparar os registros do sistema com os produtos em estoque.'}, vector=None, shard_key=None, order_value=None), ScoredPoint(id='9b468d6d-e67b-410a-a800-d2a320c555be', version=0, score=0.8424587, payload={'content': 'Atualizar o saldo do estoque imediatamente.'}, vector=None, shard_ke

  result = self.client.search(


In [29]:
search_results

[ScoredPoint(id='d812aec2-04b7-4a4a-acd0-5f3cb65c695a', version=0, score=0.85471547, payload={'content': '6. Revisão e AtualizaçãoEste procedimento deve ser revisado anualmente ou sempre que houver mudanças nos processos de estoque.'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='8b6b5120-3cad-408c-9eb0-0568041801ba', version=0, score=0.85447884, payload={'content': '2. AbrangênciaEste procedimento se aplica a todos os colaboradores responsáveis pelo armazenamento, movimentação e controle de estoque na empresa.'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='5bb6d417-5394-4db0-9747-1a9c649268a3', version=0, score=0.84839046, payload={'content': 'Comparar os registros do sistema com os produtos em estoque.'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id='9b468d6d-e67b-410a-a800-d2a320c555be', version=0, score=0.8424587, payload={'content': 'Atualizar o saldo do estoque imediatamente.'}, vector=None, shard_key=None, order_value=N