In [2]:
import pinecone
import pandas as pd
import os
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv, find_dotenv

In [3]:
%load_ext dotenv
%dotenv

In [4]:
load_dotenv(find_dotenv(), override = True)

True

In [5]:
pc = Pinecone(api_key = os.environ.get("PINECONE_API_KEY"), environment = os.environ.get("PINECONE_ENV"))

In [None]:
index_name = "medical-symptoms-index"
dimension = 768
metric = "cosine"

In [None]:
if index_name in [index.name for index in pc.list_indexes()]:
    pc.delete_index(index_name)
    print(f"{index_name} succesfully deleted.")
else:
     print(f"{index_name} not in index list.")

In [None]:
pc.create_index(
    name = index_name, 
    dimension = dimension, 
    metric = metric, 
    spec = ServerlessSpec(
        cloud = "aws", 
        region = "us-east-1")
    )

In [None]:
medical_symptoms_index = pc.Index(index_name)

In [None]:
# Load symptom embeddings
symptom_df = pd.read_pickle('../embeddings/symptom_embeddings.pkl')

In [None]:
def upsert_to_pinecone(medical_symptoms_index, symptom_df):
    try:
        # Prepare vectors for upsert
        vectors_to_upsert = [
            (row["unique_id"], row["embedding"].tolist(), {
                "disease": row['disease'],
                "symptom": row['symptom'],
                "precautions": row['precautions'],  # Ensure precautions are included here
                "metadata": row['metadata'],
                "weight": row['weight']
            })
            for index, row in symptom_df.iterrows()
        ]
        
        # Perform upsert operation
        medical_symptoms_index.upsert(vectors=vectors_to_upsert)
        print("Data successfully upserted to Pinecone index.")
    
    except KeyError as e:
        print(f"KeyError: Missing expected column in symptom_df: {e}")
    except AttributeError as e:
        print(f"AttributeError: Issue with symptom_df format or content: {e}")
    except ValueError as e:
        print(f"ValueError: Problem with data in symptom_df: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


In [None]:
upsert_to_pinecone(medical_symptoms_index, symptom_df)

In [None]:
# Initialize the model for querying
model = SentenceTransformer('multi-qa-distilbert-cos-v1')

In [None]:
# Query Pinecone Function
def query_pinecone(query, top_k=10):
    """
    Query Pinecone index using an input query.
    
    Args:
        query (str): The query string (e.g., symptom or disease name).
        top_k (int): Number of top results to retrieve.
        
    Returns:
        dict: Pinecone query results including metadata.
    """
    try:
        # Generate query embedding
        query_embedding = model.encode(query, show_progress_bar=False).tolist()
        
        # Query Pinecone
        results = medical_symptoms_index.query(vector=query_embedding, top_k=top_k, include_metadata=True)
        return results
    except Exception as e:
        print(f"Error querying Pinecone: {e}")
        return None
