In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta
from pymongo import MongoClient
from azure.core.exceptions import AzureError
from azure.core.credentials import AzureKeyCredential
import json
import urllib 
from openai import AzureOpenAI
from dotenv import dotenv_values
import os
config = dotenv_values()

In [2]:
from dotenv import load_dotenv

load_dotenv("variables.env", override=True)

MONGO_CONNECTION_STRING= os.getenv("MONGO_CONNECTION_STRING_DISKANN", "<YOUR-COSMOS-DB-CONNECTION-STRING>")
AOAI_KEY = os.getenv("AOAI_KEY")
AOAI_ENDPOINT =  os.getenv("AOAI_ENDPOINT")
API_VERSION =  os.getenv("API_VERSION")
AOAI_EMBEDDING_DEPLOYMENT =  os.getenv("AOAI_EMBEDDING_DEPLOYMENT")
AOAI_EMBEDDING_DEPLOYMENT_MODEL =  os.getenv("AOAI_EMBEDDING_DEPLOYMENT_MODEL")


client = AzureOpenAI(
  azure_endpoint= AOAI_ENDPOINT,
  api_key=AOAI_KEY,  
  api_version=API_VERSION
)

In [3]:
def generate_embedding(text):
    response = client.embeddings.create(
        model=AOAI_EMBEDDING_DEPLOYMENT_MODEL,
        input=text
    )
    return response.data[0].embedding

In [4]:
mongo_conn = MONGO_CONNECTION_STRING
mongo_client = MongoClient(mongo_conn)

db = mongo_client['Contoso_rental']

# Create collection if it doesn't exist
COLLECTION_NAME = "Listings_with_embeddings"

collection = db[COLLECTION_NAME]

if COLLECTION_NAME not in db.list_collection_names():
    db.create_collection(COLLECTION_NAME)
    print("Created collection '{}'.\n".format(COLLECTION_NAME))
else:
    print("Using collection: '{}'.\n".format(COLLECTION_NAME))

Created collection 'Listings_with_embeddings'.



In [6]:
db.command({
  'createIndexes': 'Listings',
  'indexes': [
    {
      'name': 'listingIndex',
      'key': {
        "Embedding": "cosmosSearch"
      },
      'cosmosSearchOptions': {
        'kind': 'vector-diskann',
        'numLists': 1,
        'similarity': 'COS',
        'dimensions': 2000
      }
    }
  ]
})

{'raw': {'defaultShard': {'numIndexesBefore': 1,
   'numIndexesAfter': 2,
   'createdCollectionAutomatically': False,
   'ok': 1}},
 'ok': 1}

In [15]:
# Load JSON data from file
with open("datasets without embeddings\small_for_testing.json", 'r') as file:
    data = json.load(file)

print(data[0])

{'id': '360', 'listing_url': 'https://www.airbnb.com/rooms/360', 'source': 'city scrape', 'name': 'Sit in the Peaceful Garden of the Chickadee Cottage in LoHi', 'description': "Enjoy the famous Colorado weather and unplug in indoor & outdoor living. <br />Our charming cottage has a serene ambiance throughout every area. <br />Spend a sunny afternoon out on the hammock or enjoy the garden parlor sofa relax, read, or play a game, sink into the shared hot tub, practice yoga on the deck.<br />We are located next to downtown and in the neighborhood of lower highlands, <br />Short walks to superb coffee shops, restaurants, microbrews, distilleries, dispensaries & downtown. We are 420 outdoors only. LGBT Friendly, allergy-free, fragrance-free & pet-free. Ozone sterilized.<br /><br />Chickadee Cottage is the largest of our guest cottages.<br /><br />LOCATION: <br />The cottage is located in the center of Lower Highlands (LOHI) next to the Navajo Street Arts District along with the Bug Theater.