In [8]:
from urllib.parse import quote_plus
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from pymongo.errors import CollectionInvalid, DuplicateKeyError
from pymongo.operations import SearchIndexModel
from sentence_transformers import SentenceTransformer
from PIL import Image
from io import BytesIO
from tqdm import tqdm
from dotenv import load_dotenv
from typing import Literal
import os
import json
import requests

load_dotenv()

mongo_db_user = quote_plus(os.getenv('MONGO_DB_USER'))
mongo_db_password = quote_plus(os.getenv('MONGO_DB_PASSWORD'))
mongo_db_name = os.getenv('MONGO_DB_NAME')
# collection_name = os.environ.get('MONGO_COLLECTION_NAME')
collection_name = "dining_products_cat"

uri = f"mongodb+srv://{mongo_db_user}:{mongo_db_password}@cluster0.eld31uu.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
print(uri)

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))
db = client.get_database(mongo_db_name)
collection = db.get_collection(collection_name)

model = SentenceTransformer("clip-ViT-L-14")

def create_collections(collection_name, db=db):
    try:
        db.create_collection(collection_name)
    except CollectionInvalid:
        # This is raised when the collection already exists.
        print("Images collection already exists")

def load_json_data_to_mongo(json_file_path, final_category : Literal["Center_Piece", "Glass_Set", "Dinner_Table", "Cutlery"]):
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    
    for item in data:
        item_id = item['itemId']
        thumbnail_images = item['thumbnailImages']

        embeddings = []
        for image_info in tqdm(thumbnail_images): # tqdm to see if there are more than one thumbnail images
            image_url = image_info['imageUrl']
            
            # Download the image
            response = requests.get(image_url)
            if response.status_code == 200:
                image = Image.open(BytesIO(response.content))
                image = image.convert('RGB')
                image_embedding = model.encode([image])[0]
                embeddings.append(image_embedding.tolist())
            else:
                print(f"Failed to download {image_url}")

        # Add the embeddings to the item
        item['embedding'] = embeddings
        item['final_category'] = final_category

        # Insert the item into MongoDB
        result = collection.insert_one(item)
        print(f"Document inserted with ID: {result.inserted_id}")

    print("All images have been processed and documents inserted into MongoDB.")

mongodb+srv://chatbot:ChatBot%407823@cluster0.eld31uu.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0


In [9]:
load_json_data_to_mongo('data/ebay_browse_search_results_centerpieces_for_dining_table.json', final_category="Center_Piece")

100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Document inserted with ID: 6678b60fad4c61fdd5316390


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Document inserted with ID: 6678b611ad4c61fdd5316391


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Document inserted with ID: 6678b613ad4c61fdd5316392


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Document inserted with ID: 6678b614ad4c61fdd5316393


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Document inserted with ID: 6678b616ad4c61fdd5316394


100%|██████████| 1/1 [00:01<00:00,  1.34s/it]


Document inserted with ID: 6678b617ad4c61fdd5316395


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Document inserted with ID: 6678b619ad4c61fdd5316396


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]


Document inserted with ID: 6678b61aad4c61fdd5316397


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]


Document inserted with ID: 6678b61bad4c61fdd5316398


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Document inserted with ID: 6678b61dad4c61fdd5316399


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]


Document inserted with ID: 6678b61fad4c61fdd531639a


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Document inserted with ID: 6678b620ad4c61fdd531639b


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]


Document inserted with ID: 6678b622ad4c61fdd531639c


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Document inserted with ID: 6678b623ad4c61fdd531639d


100%|██████████| 1/1 [00:01<00:00,  1.33s/it]


Document inserted with ID: 6678b625ad4c61fdd531639e


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]


Document inserted with ID: 6678b626ad4c61fdd531639f


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]


Document inserted with ID: 6678b628ad4c61fdd53163a0


100%|██████████| 1/1 [00:01<00:00,  1.37s/it]


Document inserted with ID: 6678b629ad4c61fdd53163a1


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Document inserted with ID: 6678b62bad4c61fdd53163a2


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]

Document inserted with ID: 6678b62cad4c61fdd53163a3
All images have been processed and documents inserted into MongoDB.





In [10]:
load_json_data_to_mongo('data/ebay_browse_search_results_Dining_Table.json', final_category="Dinner_Table")

100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Document inserted with ID: 6678b671ad4c61fdd53163a4


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]


Document inserted with ID: 6678b673ad4c61fdd53163a5


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


Document inserted with ID: 6678b674ad4c61fdd53163a6


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Document inserted with ID: 6678b676ad4c61fdd53163a7


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Document inserted with ID: 6678b678ad4c61fdd53163a8


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Document inserted with ID: 6678b679ad4c61fdd53163a9


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]


Document inserted with ID: 6678b67bad4c61fdd53163aa


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]


Document inserted with ID: 6678b67cad4c61fdd53163ab


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]


Document inserted with ID: 6678b67ead4c61fdd53163ac


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Document inserted with ID: 6678b67fad4c61fdd53163ad


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Document inserted with ID: 6678b681ad4c61fdd53163ae


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Document inserted with ID: 6678b682ad4c61fdd53163af


100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Document inserted with ID: 6678b684ad4c61fdd53163b0


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]


Document inserted with ID: 6678b685ad4c61fdd53163b1


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]


Document inserted with ID: 6678b687ad4c61fdd53163b2


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]


Document inserted with ID: 6678b688ad4c61fdd53163b3


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Document inserted with ID: 6678b68bad4c61fdd53163b4


100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


Document inserted with ID: 6678b68dad4c61fdd53163b5


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Document inserted with ID: 6678b68ead4c61fdd53163b6


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

Document inserted with ID: 6678b690ad4c61fdd53163b7
All images have been processed and documents inserted into MongoDB.





In [12]:
load_json_data_to_mongo('data/ebay_browse_search_results_Glass_Set.json', final_category="Glass_Set")

100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Document inserted with ID: 6678b81ead4c61fdd53163cc


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Document inserted with ID: 6678b81fad4c61fdd53163cd


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]


Document inserted with ID: 6678b821ad4c61fdd53163ce


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Document inserted with ID: 6678b823ad4c61fdd53163cf


100%|██████████| 1/1 [00:01<00:00,  1.85s/it]


Document inserted with ID: 6678b825ad4c61fdd53163d0


100%|██████████| 1/1 [00:01<00:00,  1.86s/it]


Document inserted with ID: 6678b827ad4c61fdd53163d1


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Document inserted with ID: 6678b828ad4c61fdd53163d2


100%|██████████| 1/1 [00:02<00:00,  2.20s/it]


Document inserted with ID: 6678b82bad4c61fdd53163d3


100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Document inserted with ID: 6678b82cad4c61fdd53163d4


100%|██████████| 1/1 [00:01<00:00,  1.40s/it]


Document inserted with ID: 6678b82ead4c61fdd53163d5


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Document inserted with ID: 6678b82fad4c61fdd53163d6


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]


Document inserted with ID: 6678b831ad4c61fdd53163d7


100%|██████████| 1/1 [00:01<00:00,  1.59s/it]


Document inserted with ID: 6678b833ad4c61fdd53163d8


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


Document inserted with ID: 6678b835ad4c61fdd53163d9


100%|██████████| 1/1 [00:01<00:00,  1.45s/it]


Document inserted with ID: 6678b836ad4c61fdd53163da


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Document inserted with ID: 6678b838ad4c61fdd53163db


100%|██████████| 1/1 [00:01<00:00,  1.62s/it]


Document inserted with ID: 6678b839ad4c61fdd53163dc


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]


Document inserted with ID: 6678b83bad4c61fdd53163dd


100%|██████████| 1/1 [00:01<00:00,  1.66s/it]


Document inserted with ID: 6678b83dad4c61fdd53163de


100%|██████████| 1/1 [00:02<00:00,  2.02s/it]

Document inserted with ID: 6678b83fad4c61fdd53163df
All images have been processed and documents inserted into MongoDB.





In [11]:
load_json_data_to_mongo('data/ebay_browse_search_results_Dinner_Set.json', final_category="Cutlery")

100%|██████████| 1/1 [00:01<00:00,  1.79s/it]


Document inserted with ID: 6678b78fad4c61fdd53163b8


100%|██████████| 1/1 [00:01<00:00,  1.80s/it]


Document inserted with ID: 6678b791ad4c61fdd53163b9


100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Document inserted with ID: 6678b793ad4c61fdd53163ba


100%|██████████| 1/1 [00:01<00:00,  1.44s/it]


Document inserted with ID: 6678b794ad4c61fdd53163bb


100%|██████████| 1/1 [00:01<00:00,  1.35s/it]


Document inserted with ID: 6678b796ad4c61fdd53163bc


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Document inserted with ID: 6678b797ad4c61fdd53163bd


100%|██████████| 1/1 [00:01<00:00,  1.43s/it]


Document inserted with ID: 6678b799ad4c61fdd53163be


100%|██████████| 1/1 [00:01<00:00,  1.36s/it]


Document inserted with ID: 6678b79bad4c61fdd53163bf


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]


Document inserted with ID: 6678b79cad4c61fdd53163c0


100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


Document inserted with ID: 6678b79ead4c61fdd53163c1


100%|██████████| 1/1 [00:01<00:00,  1.50s/it]


Document inserted with ID: 6678b7a0ad4c61fdd53163c2


100%|██████████| 1/1 [00:01<00:00,  1.52s/it]


Document inserted with ID: 6678b7a1ad4c61fdd53163c3


100%|██████████| 1/1 [00:01<00:00,  1.38s/it]


Document inserted with ID: 6678b7a3ad4c61fdd53163c4


100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


Document inserted with ID: 6678b7a4ad4c61fdd53163c5


100%|██████████| 1/1 [00:01<00:00,  1.42s/it]


Document inserted with ID: 6678b7a6ad4c61fdd53163c6


100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


Document inserted with ID: 6678b7a7ad4c61fdd53163c7


100%|██████████| 1/1 [00:01<00:00,  1.56s/it]


Document inserted with ID: 6678b7a9ad4c61fdd53163c8


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Document inserted with ID: 6678b7abad4c61fdd53163c9


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]


Document inserted with ID: 6678b7acad4c61fdd53163ca


100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

Document inserted with ID: 6678b7aead4c61fdd53163cb
All images have been processed and documents inserted into MongoDB.



