Part 1: Load Documents & Execute Reranking Model
1. Install Pinecone libraries

In [16]:
!pip install pinecone==6.0.1 pinecone-notebooks



2. Authenticate with Pinecone

In [17]:
import os
if not os.environ.get("PINECONE_API_KEY"):
   from pinecone_notebooks.colab import Authenticate
   Authenticate()

3. Instantiate the Pinecone client


In [18]:
from pinecone import Pinecone
api_key = os.environ["PINECONE_API_KEY"]
environment = "us-west1-gcp"  # e.g., "us-west1-gcp"
pc = Pinecone(api_key=api_key)

4. Define your query & documents

In [19]:
query = "Tell me about Apple's products"
documents = [
   "An apple made Snow-white comatose",
   "My I-Pod broke after I attempted to use is as a hard-drive",
   "My previous Macboook has remained relevant for 10 years",
   "I stopped using I-Phones because of battery issues",
   "I add apricot jam as glazing in my apple pies"
]

5. Call the reranker

In [20]:
from pinecone import RerankModel
reranked = pc.inference.rerank(
   model="bge-reranker-v2-m3",
   query=query,
   documents=[{"id": str(i), "text": doc} for i, doc in enumerate(documents)],
   top_n=3  # e.g., 3
)

6. Inspect reranked results

In [40]:
def show_reranked(query, matches):
   print(f"Query: {query}")
   for i in range(len(matches.data)):
    print(f"{i+1}, {matches.data[i].score}, {matches.data[i].document.text}")  # Print the position (i+1), m.score, and m.document.text
show_reranked(query, reranked.rerank_result)

Query: Tell me about Apple's products
1, 0.00092538487, My previous Macboook has remained relevant for 10 years
2, 0.0005399804, An apple made Snow-white comatose
3, 2.4682904e-05, I add apricot jam as glazing in my apple pies


Part 2: Setup A Serverless Index For Medical Notes

1. Install data & model libraries

In [41]:
!pip install pandas torch transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

2. Import modules & define environment settings

In [43]:
import os, time, pandas as pd, torch
from pinecone import Pinecone, ServerlessSpec

# Define cloud and region
cloud = "aws"              # e.g., "aws", "gcp"
region = "us-east-1"       # e.g., "us-east-1"

# Create the correct ServerlessSpec — NO cpu/memory here
spec = ServerlessSpec(cloud=cloud, region=region)

# Initialize Pinecone client — note: NO 'environment' for serverless
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index_name = "pinecone-reranker"

3. Create or recreate the index

In [45]:
if pc.has_index(index_name):
   pc.delete_index(index_name)
pc.create_index(
   name=index_name,
   dimension=384,           # must match embedding vector size
   spec=spec
)

{
    "name": "pinecone-reranker",
    "metric": "cosine",
    "host": "pinecone-reranker-n15ek1d.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

Part 3: Load The Sample Data

1. Download & read JSONL

In [46]:
import requests, tempfile

with tempfile.TemporaryDirectory() as tmpdir:
   file_path = os.path.join(tmpdir, "sample_notes_data.jsonl")
   url = "https://raw.githubusercontent.com/pinecone-io/examples/refs/heads/master/docs/data/sample_notes_data.jsonl"  # raw GitHub URL to JSONL file
   resp = requests.get(url)
   resp.raise_for_status()
   open(file_path, "wb").write(resp.content)
   df = pd.read_json(file_path, orient='records', lines=True)

2. Preview the DataFrame

In [47]:
print(df.head())

     id                                             values  \
0  P011  [-0.2027486265, 0.2769146562, -0.1509393603, 0...   
1  P001  [0.1842793673, 0.4459365904, -0.0770567134, 0....   
2  P002  [-0.2040648609, -0.1739618927, -0.2897160649, ...   
3  P003  [0.1889383644, 0.2924542725, -0.2335938066, -0...   
4  P004  [-0.12171068040000001, 0.1674752235, -0.231888...   

                                            metadata  
0  {'advice': 'rest, hydrate', 'symptoms': 'heada...  
1  {'tests': 'EKG, stress test', 'symptoms': 'che...  
2  {'HbA1c': '7.2', 'condition': 'diabetes', 'med...  
3  {'symptoms': 'cough, wheezing', 'diagnosis': '...  
4  {'referral': 'dermatology', 'condition': 'susp...  


Part 4: Upsert Data Into The Index

1. Instantiate index client & upsert

In [48]:
index = pc.Index(index_name)
index.upsert_from_dataframe(df)

sending upsert requests:   0%|          | 0/100 [00:00<?, ?it/s]

{'upserted_count': 100}

2. Wait for availability

In [49]:
def is_ready(idx):
   stats = idx.describe_index_stats()
   return stats.total_vector_count > 0

while not is_ready(index):
   time.sleep(5)
print(index.describe_index_stats())

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 100}},
 'total_vector_count': 100,
 'vector_type': 'dense'}


Part 5: Query & Embedding Function

1. Define your embedding function

In [50]:
from sentence_transformers import SentenceTransformer

def get_embedding(text):
   model = SentenceTransformer("all-MiniLM-L6-v2")  # e.g., "all-MiniLM-L6-v2"
   return model.encode(text)

2. Run a semantic search query

In [53]:
question = "what if my patient has leg pain"  # e.g., "what if my patient has leg pain"
emb = get_embedding(question)
results = index.query(vector=emb.tolist(), top_k=5, include_metadata=True)
matches = sorted(results.matches, key=lambda m: m.score, reverse=True)

Part 6: Display & Rerank Clinical Notes

1. Display initial search results

In [57]:
def show_results(q, matches):
   print(f"Question: {q}")
   for i, m in enumerate(matches):
       print(f"{i+1}, {m['id']}, {m['score']}, {m['metadata']}")  # print i+1, m.id, m.score, m.metadata
show_results(question, matches)

Question: what if my patient has leg pain
1, P0100, 0.518134892, {'advice': 'over-the-counter pain relief, stretching', 'symptoms': 'muscle pain'}
2, P047, 0.50096035, {'symptoms': 'back pain', 'treatment': 'physical therapy'}
3, P095, 0.50096035, {'symptoms': 'back pain', 'treatment': 'physical therapy'}
4, P007, 0.459599316, {'surgery': 'knee arthroscopy', 'symptoms': 'pain, swelling', 'treatment': 'physical therapy'}
5, P028, 0.446535289, {'condition': 'knee pain', 'referral': 'orthopedics'}


2. Prepare documents for reranking



In [58]:
rerank_docs = [
   {"id": m.id, "reranking_field": "; ".join([f"{k}: {v}" for k, v in m.metadata.items()])}
   for m in matches
]
rerank_query = "what about an articular pain in the knee?"  # e.g., a more specific clinical question

3. Execute serverless reranking

In [65]:
reranked = pc.inference.rerank(
   model="bge-reranker-v2-m3",
   query=rerank_query,
   documents=rerank_docs,
   rank_fields=["reranking_field"],
   top_n=5  # number of top reranked notes to view
)

In [79]:
print(reranked.data)

[{
    index=4,
    score=0.06791668,
    document={
        id='P028',
        reranking_field='condition: knee pain; referral: orthopedics'
    }
  }, {
    index=3,
    score=0.01384861,
    document={
        id='P007',
        reranking_field='surgery: knee arthroscopy; symptoms: pain, swelling; treatment: physical therapy'
    }
  }, {
    index=0,
    score=0.004848533,
    document={
        id='P0100',
        reranking_field='advice: over-the-counter pain relief, stretching; symptoms: muscle pain'
    }
  }, {
    index=2,
    score=0.0042315875,
    document={
        id='P095',
        reranking_field='symptoms: back pain; treatment: physical therapy'
    }
  }, {
    index=1,
    score=0.004166256,
    document={
        id='P047',
        reranking_field='symptoms: back pain; treatment: physical therapy'
    }
  }]


 4. Show reranked results

In [82]:
def show_reranked(q, matches):
    print(f"\n🔍 Refined Query: {q}\n")
    for i, m in enumerate(matches):
        doc = m["document"]
        print(f"{i+1}. ID: {doc['id']}")
        print(f"   Score: {m['score']:.6f}")
        print(f"   Fields: {doc['reranking_field']}\n")
show_reranked(rerank_query, reranked.data)


🔍 Refined Query: what about an articular pain in the knee?

1. ID: P028
   Score: 0.067917
   Fields: condition: knee pain; referral: orthopedics

2. ID: P007
   Score: 0.013849
   Fields: surgery: knee arthroscopy; symptoms: pain, swelling; treatment: physical therapy

3. ID: P0100
   Score: 0.004849
   Fields: advice: over-the-counter pain relief, stretching; symptoms: muscle pain

4. ID: P095
   Score: 0.004232
   Fields: symptoms: back pain; treatment: physical therapy

5. ID: P047
   Score: 0.004166
   Fields: symptoms: back pain; treatment: physical therapy

