In [8]:
import os
from langchain.vectorstores import Chroma
from langchain.schema import Document
from langchain_community.embeddings import OllamaEmbeddings

# Assuming you're using OllamaEmbeddings
embedding = OllamaEmbeddings(model="nomic-embed-text")

kc_texts = [
    "1. Syntax and Structure: Variable declaration and initialization, Function definition and calling, Control structures (if, else, switch, loops), Array manipulation, Pointer usage",
    "2. Memory Management: Stack vs. heap allocation, Dynamic memory allocation (malloc, free), Memory leaks and dangling pointers",
    "3. Data Types and Operations: Primitive data types (int, float, char, etc.), Type casting and conversion, Bitwise operations",
    "4. Input/Output: Standard I/O functions (printf, scanf), File I/O operations",
    "5. Debugging Techniques: Using print statements for debugging, Debugging tools (gdb, valgrind), Common runtime errors and their causes",
    "6. Code Organization: Header files and #include directives, Modular programming principles, Code documentation and commenting"
]

# Create Document objects from the KC texts
kc_docs = [Document(page_content=text, metadata={"source": "KC"}) for text in kc_texts]

# Specify an absolute path for Chroma to store its data
persist_directory = os.path.join(os.getcwd(), "kc-retriever")

# Initialize Chroma vector store with the documents and embeddings, and specify the persist directory
vectorstore_kc = Chroma.from_documents(kc_docs, embedding, persist_directory=persist_directory)

# Create a retriever from the Chroma vector store
retriever_kc = vectorstore_kc.as_retriever()

print(f"Chroma DB is stored in: {persist_directory}")
print(f"Directory exists: {os.path.exists(persist_directory)}")
print(f"Contents of the directory: {os.listdir(persist_directory)}")

Chroma DB is stored in: /Users/nittinmurthi/Documents/VS_Code/120-AI-TA/kc-retriever
Directory exists: True
Contents of the directory: ['8eef439e-cf2c-4d12-937d-5366c39ffecd', 'chroma.sqlite3']
