In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

### Pinecone Documentation [DOCS](https://docs.pinecone.io/home)
### Get started [Quickstart](https://docs.pinecone.io/guides/get-started/quickstart)

## Establish a connection to MongoDB

In [2]:
from pymongo import MongoClient

client = MongoClient(os.getenv('CONNECTION_STRING'))

db = client['DeepLearningCluster']  
collection = db['Violations']  


## Retrieve all documents from Violations_detected collection

In [3]:
documents = collection.find()

In [4]:
inputs = [d for d in documents]

data = []
for i in range(len(inputs)):
    text = f"A {inputs[i]['vehicle_type']} was detected with this license plate number {inputs['license_plate_number']}{inputs[i]['violation_type']} in {inputs[i]['street_name']} road at {inputs[i]['time']} on {inputs[i]['date']} at these coordinates: {inputs[i]['latitude']}, {inputs[i]['longitude']}"
    data.append({'id': str(i+1), 'text': text})


In [5]:
inputs[0]

{'_id': ObjectId('66f22b85895db7f65810ca78'),
 'date': '2024-09-22',
 'time': '14:38:35',
 'license_plate_number': '1063 ~ad_',
 'vehicle_type': 'car',
 'violation_type': 'overtaking',
 'longitude': 24.921224,
 'latitude': 46.722133,
 'street_name': 'طريق المطار'}

In [6]:
data[0]

{'id': '1',
 'text': 'A car was detected overtaking in طريق المطار road at 14:38:35 on 2024-09-22 at these coordinates: 46.722133, 24.921224'}

In [7]:
print(type(data[0]))
print(type(inputs))

<class 'dict'>
<class 'list'>


## initialize a Pinecone client

In [8]:
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.getenv('pinecone_API_KEY'))

  from tqdm.autonotebook import tqdm


## create new serverless index

In [9]:
# Create Index
index_name = "violation-data4"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=3072,
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )

index = pc.Index(index_name)


## create vector embedings

In [10]:
import openai
openai.api_key = os.getenv('OPENAI_API_KEY') 


def embed(docs: list[str]) -> list[list[float]]:
    res = openai.embeddings.create(
        input=docs,
        model="text-embedding-3-large"
    )
    doc_embeds = [r.embedding for r in res.data] 
    return doc_embeds 


doc_embeds = embed([d["text"] for d in data])

## upsert the data

In [67]:
inputs[0]

{'_id': ObjectId('66eeb404c466dc07daa4dc96'),
 'date': '2024-09-21',
 'time': '14:02:44',
 'license_plate_number': nan,
 'vehicle_type': 'car',
 'violation_type': 'overtaking',
 'longitude': 46.7219,
 'latitude': 24.6877,
 'street_name': 'الحارث بن الحباب'}

In [11]:
vectors = []
# f"A {inputs[i]['vehicle_type']} was detected {inputs[i]['violation_type']}
#  in {inputs[i]['street_name']} road at {inputs[i]['time']} on {inputs[i]['date']} 
#  at these coordinates: {inputs[i]['latitude']}, {inputs[i]['longitude']}
for d, e in zip(inputs, doc_embeds):
    vectors.append({
        "id": str(d['_id']),
        "values": e,
        "metadata": {'vehicle_type': d['vehicle_type'], 'violation_type': d['violation_type'], 'street_name': d['street_name'], 'time': d['time'], 'date': d['date'], 'latitude': d['latitude'], 'longitude': d['longitude']}
    })

index.upsert(
    vectors=vectors,
    namespace="violation-data-day1"
)


{'upserted_count': 74}

## check index status

In [12]:
print(index.describe_index_stats())

{'dimension': 3072,
 'index_fullness': 0.0,
 'namespaces': {'violation-data-day1': {'vector_count': 74}},
 'total_vector_count': 74}


### Query

In [15]:
query = "مخالفة برقم اللوحة 7862"

x = embed([query])

results = index.query(
    namespace="violation-data-day1",
    vector=x[0],
    top_k=3,
    include_values=False,
    include_metadata=True,
    
)

print(results)

{'matches': [{'id': '66f22b85895db7f65810cabb',
              'metadata': {'date': '2024-09-22',
                           'latitude': 46.666884,
                           'longitude': 24.828788,
                           'street_name': 'انس بن مالك',
                           'time': '20:45:09',
                           'vehicle_type': 'car',
                           'violation_type': 'overtaking'},
              'score': 0.356940389,
              'values': []},
             {'id': '66f22b85895db7f65810ca96',
              'metadata': {'date': '2024-09-22',
                           'latitude': 46.666884,
                           'longitude': 24.828788,
                           'street_name': 'انس بن مالك',
                           'time': '20:45:09',
                           'vehicle_type': 'car',
                           'violation_type': 'overtaking'},
              'score': 0.355448663,
              'values': []},
             {'id': '66f22b85895db7f65810ca9d'

In [None]:
#TODO