In [3]:
!pip uninstall -y pinecone-client pinecone langchain langchain-community langchain-huggingface

Found existing installation: pinecone-client 2.2.4
Uninstalling pinecone-client-2.2.4:
  Successfully uninstalled pinecone-client-2.2.4
[0mFound existing installation: langchain 0.3.25
Uninstalling langchain-0.3.25:
  Successfully uninstalled langchain-0.3.25
Found existing installation: langchain-community 0.3.25
Uninstalling langchain-community-0.3.25:
  Successfully uninstalled langchain-community-0.3.25
Found existing installation: langchain-huggingface 0.3.0
Uninstalling langchain-huggingface-0.3.0:
  Successfully uninstalled langchain-huggingface-0.3.0


In [1]:
!pip install -U langchain langchain-community langchain-huggingface
!pip install -U sentence-transformers pinecone

Collecting pinecone
  Using cached pinecone-7.0.2-py3-none-any.whl.metadata (9.5 kB)
Using cached pinecone-7.0.2-py3-none-any.whl (516 kB)
Installing collected packages: pinecone
Successfully installed pinecone-7.0.2


Self-Querying Retriever

In [None]:
import os
os.environ["PINECONE_API_KEY"] = "your-key"
api_key = os.environ["PINECONE_API_KEY"]

In [None]:
# Imports
# -----------------------------
import uuid
import os
from pinecone import Pinecone, ServerlessSpec
from langchain.schema import Document
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEndpoint
from huggingface_hub import login

# -----------------------------
# HuggingFace Login + LLM
# -----------------------------
login(token="your-key")  # <-- Replace with your token
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.6,
    max_new_tokens=200,
)
print("✅ Hugging Face login and LLM ready")

# -----------------------------
# Embeddings
# -----------------------------
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# -----------------------------
# Pinecone Init
# -----------------------------
pc = Pinecone(api_key="your-key")  # <-- Replace with your Pinecone API key
INDEX_NAME = "wine-index"
dimension = 384

if INDEX_NAME in [i['name'] for i in pc.list_indexes().indexes]:
    pc.delete_index(INDEX_NAME)

pc.create_index(
    name=INDEX_NAME,
    dimension=dimension,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

# Target the index
index = pc.Index(INDEX_NAME)

# -----------------------------
# Documents
# -----------------------------
docs = [
    Document(
        page_content="Complex, layered, rich red with dark fruit flavors",
        metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach",
        metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"},
    ),
    Document(
        page_content="Full-bodied red with notes of black fruit and spice",
        metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"},
    ),
    Document(
        page_content="Elegant, balanced red with herbal and berry nuances",
        metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"},
    ),
    Document(
        page_content="Highly sought-after Pinot Noir with red fruit and earthy notes",
        metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"},
    ),
    Document(
        page_content="Crisp white with tropical fruit and citrus flavors",
        metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"},
    ),
    Document(
        page_content="Rich, complex Champagne with notes of brioche and citrus",
        metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"},
    ),
    Document(
        page_content="Intense, dark fruit flavors with hints of chocolate",
        metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Exotic, aromatic white with stone fruit and floral notes",
        metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"},
    ),
]

# -----------------------------
# Prepare records and Upsert
# -----------------------------
records = []
for doc in docs:
    vector = embedder.embed_query(doc.page_content)
    doc_id = str(uuid.uuid4())
    record = {
        "id": doc_id,
        "values": vector,
        "metadata": {
            "text": doc.page_content,
            **doc.metadata
        }
    }
    records.append(record)

index.upsert(vectors=records, namespace="wine-namespace")

print("✅ Documents upserted to Pinecone!")
# -----------------------------
# Querying manually (example)
# -----------------------------
query = "Show me red wines with a rating above 95"
query_vector = embedder.embed_query(query)

# Search
results = index.query(
    vector=query_vector,
    top_k=5,
    include_metadata=True,
    namespace="wine-namespace"
)

# Display Results
print("\n🔍 Query:", query)
for match in results.matches:
    metadata = match.metadata
    print(f"- {metadata.get('name')} ({metadata.get('year')}) — {metadata.get('text')}")

✅ Hugging Face login and LLM ready
✅ Documents upserted to Pinecone!

🔍 Query: Show me red wines with a rating above 95


In [7]:
import uuid

# Step 1: Embed the query
query = "Show me red wines with a rating above 95"
query_vector = embedder.embed_query(query)

# Step 2: Query Pinecone
response = index.query(
    vector=query_vector,
    top_k=10,
    include_metadata=True,
    namespace="wine-namespace"
)

# Step 3: Filter results manually
filtered_results = []
for match in response['matches']:
    metadata = match['metadata']
    if (
        metadata.get("color") == "red" and
        metadata.get("rating", 0) > 95
    ):
        filtered_results.append(metadata)

# Step 4: Print results
print("\n🔍 Query:", query)
if not filtered_results:
    print("No matching wines found.")
else:
    for r in filtered_results:
        print(f"- {r['name']} ({r['year']}) — {r['text']}")



🔍 Query: Show me red wines with a rating above 95
- Domaine de la Romanée-Conti (2018.0) — Highly sought-after Pinot Noir with red fruit and earthy notes
- Opus One (2018.0) — Complex, layered, rich red with dark fruit flavors
- Penfolds Grange (2017.0) — Full-bodied red with notes of black fruit and spice
- Caymus Special Selection (2018.0) — Intense, dark fruit flavors with hints of chocolate


In [9]:
# Delete the index
pc.delete_index("wine-index")