In [1]:
import chromadb
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import pandas as pd

# Load tech stack data from CSV
tech_stack_df = pd.read_csv('tech_stack.csv')

# Initialize ChromaDB client
client = chromadb.Client()
collection = client.create_collection(name="tech_stack_portfolio")

# Initialize HuggingFace embeddings
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Add tech stack data to ChromaDB
for index, row in tech_stack_df.iterrows():
    tech_stack = row['Tech Stack']
    url = row['Portfolio Website']
    
    # Generate embeddings for Tech Stack and URL
    embeddings_list = embeddings.embed_documents([tech_stack, url])
    tech_stack_embedding = embeddings_list[0]
    url_embedding = embeddings_list[1]
    
    # Insert into ChromaDB collection
    collection.add(
        documents=[tech_stack, url],
        embeddings=[tech_stack_embedding, url_embedding],
        metadatas=[{"tech_stack": tech_stack, "url": url}, {"tech_stack": tech_stack, "url": url}],
        ids=[f"{index}_tech_stack", f"{index}_url"]
    )

# Query the collection
query = "Python, Django"
query_embedding = embeddings.embed_documents([query])[0]

# Retrieve the most similar documents from ChromaDB
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5  # Number of top results to return
)

# Output the results
for result in results['documents']:
    print(f"Tech Stack: {result[0]}, Portfolio URL: {result[1]}")