In [1]:
import os
import pinecone
from datetime import datetime, timedelta
from azure.storage.blob import BlobServiceClient, generate_blob_sas, BlobSasPermissions
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.core.credentials import AzureKeyCredential
from openai import OpenAI
from openai import AzureOpenAI
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
PINECONE_INDEX_NAME = "law-kb"

In [7]:
if not PINECONE_API_KEY:
    raise ValueError("PINECONE_API_KEY is not set.")
pc = pinecone.Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = PINECONE_INDEX_NAME
index2 = pc.Index(index_name)

In [8]:
openai_client = AzureOpenAI(
    api_key = os.getenv("EMBEDDING_API_KEY"),
    api_version = os.getenv("EMBEDDING_API_VERSION"),
    azure_endpoint=os.getenv("EMBEDDING_API_ENDPOINT")
)

def generate_embeddings(text):
    response = openai_client.embeddings.create(model="text-embedding-ada-002", input=text)
    return response.data[0].embedding


In [None]:
laws = [
    {
        "id": "1",
        "title": "Consumer Protection Act, 2019 - False Advertising",
        "description": "This law protects consumers against false or misleading advertisements.",
        "section": "Section 2(28), Section 21",
        "penalty": "Fine up to ₹10,00,000 or imprisonment up to 2 years.",
        "jurisdiction": "India",
        "source": "https://egazette.nic.in"
    },
    {
        "id": "2",
        "title": "The Indian Penal Code, 1860 - Cheating",
        "description": "This law penalizes acts where a person intentionally deceives another.",
        "section": "Section 415, Section 417, Section 420",
        "penalty": "Imprisonment up to 7 years and fine.",
        "jurisdiction": "India",
        "source": "https://indiacode.nic.in"
    }
]

In [12]:
def store_laws_in_pinecone():
    for law in laws:
        law_text = f"{law['title']} {law['description']} {law['section']} {law['penalty']} {law['jurisdiction']}"
        
        # Generate embedding
        response = openai_client.embeddings.create(model="text-embedding-ada-002", input=law_text)

        # Convert response to dictionary
        response_dict = response.model_dump()
        embedding = response_dict["data"][0]["embedding"]

        # Store in Pinecone
        index2.upsert([(law["id"], embedding, law)])

    print("Laws stored in Pinecone")

In [13]:
store_laws_in_pinecone()

Laws stored in Pinecone
