In [50]:
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
import os

In [51]:
# Initialize Chroma client
chroma_client = chromadb.HttpClient(host="localhost", port=8005, settings=Settings())

# Load sentence transformer (example model)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Create or get collection
collection = chroma_client.get_or_create_collection(name="code_files")

In [52]:
files_list = ["sample.py", "sample2.py"]
docs = []
embedds = []

for file_name in files_list:
    file_path = os.path.abspath(f"./sample_files/{file_name}")
    
    with open(file_path, "r", encoding="utf-8") as f:
        file_content = f.read()

    # Prepare text to embed (file name + content)
    text_to_embed = f"{file_path} {file_content}"
    docs.append(text_to_embed)

    embedds.append(embedder.encode([text_to_embed])[0].tolist())

In [None]:
collection.upsert(
    ids=["file1", "file2"],  # unique ID
    documents=docs,
    embeddings=embedds,
    metadatas=[{"file_name": files_list[i]} for i in range(len(docs))]
)

In [55]:
results = collection.query(
    query_texts=["saludar(Iker)"], 
    n_results=1 
)

print(results)

{'ids': [['file1']], 'distances': [[1.6022147]], 'embeddings': None, 'metadatas': [[None]], 'documents': [['/Users/ikerochoa/Desktop/Tec/8vo Sementre/kaizen-copilot/chroma_db/test/sample_files/sample.py def saludar(nombre):\n    """Imprime un saludo personalizado."""\n    print(f"Hola, {nombre}!")\n\ndef sumar(a, b):\n    """Devuelve la suma de dos números."""\n    return a + b\n\ndef es_par(numero):\n    """Devuelve True si el número es par, False si es impar."""\n    return numero % 2 == 0\n\ndef factorial(n):\n    """Calcula el factorial de un número de forma recursiva."""\n    if n == 0 or n == 1:\n        return 1\n    else:\n        return n * factorial(n - 1)\n\ndef lista_pares(hasta):\n    """Devuelve una lista de números pares hasta un número dado."""\n    return [x for x in range(hasta + 1) if es_par(x)]\n\n# Código de prueba si ejecutas este archivo directamente\nif __name__ == "__main__":\n    saludar("Iker")\n    print("Suma de 3 + 4:", sumar(3, 4))\n    print("¿Es 10 par?