### Import libraries

In [1]:
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance

import pandas as pd
import openai

### Read sample dataset with Amazon inventory data

In [2]:
df_items = pd.read_json("../../data/meta_Electronics_2022_2023_with_category_rating_100_sample_1000.jsonl", lines=True)

In [3]:
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Computers,ASUS Vivobook Go 12 L210 11.6‚Äù Ultra-Thin Lapt...,3.9,444,[Efficient Intel Celeron N4020 Processor 1.1 G...,[The ASUS Laptop L210MA is designed to help yo...,185.0,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'ASUS Vivobook Go 11.6"" - Full Revi...",ASUS,"[Electronics, Computers & Accessories, Compute...",{'Standing screen display size': '11.6 Inches'...,B0B9JJ1D8Y,,,
1,Computers,G.Skill Trident Z5 NEO RGB Series (AMD Expo) 3...,4.8,145,"[Trident Z5 Neo RGB Series, designed for AMD X...",[],104.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Trident Z5 & Trident Z5 RGB Series...,G.Skill,"[Electronics, Computers & Accessories, Compute...","{'RAM': '32 GB DDR5', 'Brand': 'G.Skill', 'Ser...",B0BF6ZQ8MY,,,
2,Computers,CORSAIR VENGEANCE SODIMM DDR5 RAM 8GB (1x8GB) ...,4.5,234,[Upgrade Your DDR5 Gaming or Performance Lapto...,[Upgrade your DDR5 gaming or performance lapto...,36.99,[{'thumb': 'https://m.media-amazon.com/images/...,"[{'title': 'Corsair Vengeance DDR5 32GB', 'url...",Corsair,"[Electronics, Computers & Accessories, Compute...","{'RAM': '8 GB DDR5', 'Memory Speed': '4800 MHz...",B09YF1L6Y2,,,
3,All Electronics,UGREEN M.2 NVMe SSD Enclosure 10Gbps USB 3.2 E...,4.5,349,[High-speed Transmission: This NVMe M.2 SSD en...,[],24.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'Taking A Look At The ORICO M.2 Dri...,UGREEN,"[Electronics, Computers & Accessories, Compute...",{'Product Dimensions': '4.92 x 1.61 x 0.55 inc...,B0BQ6SYQWL,,,
4,,Slim & Expandable Laptop Backpack 15 15.6 16 I...,4.6,3663,[Slim & Expandable Design: Slim style for ligh...,[],29.99,[{'thumb': 'https://m.media-amazon.com/images/...,[{'title': 'I got a Grey one A really slim bag...,ZINZ,"[Electronics, Computers & Accessories, Laptop ...","{'Brand': 'ZINZ', 'Item Weight': '1.59 pounds'...",B097CYFH1M,,,


In [4]:
list(df_items['features'].items())[0]

(0,
 ['Efficient Intel Celeron N4020 Processor 1.1 GHz (4M Cache, up to 2.8 GHz).Voltage:19.0 volts',
  '11.6‚Äù HD (1366 x 768) Slim Display',
  '64GB eMMC Flash Storage and 4GB LPDDR4 RAM',
  'Windows 11 in S mode with One Year of Office 365 Personal',
  'Slim and Portable: 0.7‚Äù thin and weighs only 2.31 lbs (battery included)',
  'USB 3.2 Gen 1 Type-C, USB 3.2 Gen 1 Type-A, HDMI (*USB Transfer speed may vary. Learn more at ASUS website)',
  '802.11ac Wi-Fi for speeds up to three times faster than 802.11n',
  "Windows 11 in S mode is a 100% app based version of Windows where applications are verified and tested for quality on the Microsoft store. If you want to install an app that isn't available in the Microsoft Store, you'll need to switch out of S mode for free, which is easy and fast."])

In [5]:
from typing import Any, Hashable


list[tuple[Hashable, Any]](df_items['images'].items())[0]

(0,
 [{'thumb': 'https://m.media-amazon.com/images/I/41qj8g0hJiL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/41qj8g0hJiL._AC_.jpg',
   'variant': 'MAIN',
   'hi_res': 'https://m.media-amazon.com/images/I/810BY5U9baL._AC_SL1500_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/312AFRapXjL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/312AFRapXjL._AC_.jpg',
   'variant': 'PT01',
   'hi_res': 'https://m.media-amazon.com/images/I/71j97swOyqL._AC_SL1500_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/41szszVfKvL._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/41szszVfKvL._AC_.jpg',
   'variant': 'PT02',
   'hi_res': 'https://m.media-amazon.com/images/I/81cC7+BLJPL._AC_SL1500_.jpg'},
  {'thumb': 'https://m.media-amazon.com/images/I/31Pk4cdXs4L._AC_US40_.jpg',
   'large': 'https://m.media-amazon.com/images/I/31Pk4cdXs4L._AC_.jpg',
   'variant': 'PT03',
   'hi_res': 'https://m.media-amazon.com/images/I/71dLM1NqYgL._AC_SL1

### Preprocess titles and features

In [10]:
def preprocess_description(row):
    return f"{row['title']} {' '.join(row['features'])}"

In [12]:
def extract_first_large_image(row):
    return row['images'][0].get('large','')

In [13]:
df_items['description'] = df_items.apply(preprocess_description, axis=1)
df_items['images'] = df_items.apply(extract_first_large_image, axis=1)


In [14]:
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Computers,ASUS Vivobook Go 12 L210 11.6‚Äù Ultra-Thin Lapt...,3.9,444,[Efficient Intel Celeron N4020 Processor 1.1 G...,ASUS Vivobook Go 12 L210 11.6‚Äù Ultra-Thin Lapt...,185.0,https://m.media-amazon.com/images/I/41qj8g0hJi...,"[{'title': 'ASUS Vivobook Go 11.6"" - Full Revi...",ASUS,"[Electronics, Computers & Accessories, Compute...",{'Standing screen display size': '11.6 Inches'...,B0B9JJ1D8Y,,,
1,Computers,G.Skill Trident Z5 NEO RGB Series (AMD Expo) 3...,4.8,145,"[Trident Z5 Neo RGB Series, designed for AMD X...",G.Skill Trident Z5 NEO RGB Series (AMD Expo) 3...,104.99,https://m.media-amazon.com/images/I/41kXk+Tm4l...,[{'title': 'Trident Z5 & Trident Z5 RGB Series...,G.Skill,"[Electronics, Computers & Accessories, Compute...","{'RAM': '32 GB DDR5', 'Brand': 'G.Skill', 'Ser...",B0BF6ZQ8MY,,,
2,Computers,CORSAIR VENGEANCE SODIMM DDR5 RAM 8GB (1x8GB) ...,4.5,234,[Upgrade Your DDR5 Gaming or Performance Lapto...,CORSAIR VENGEANCE SODIMM DDR5 RAM 8GB (1x8GB) ...,36.99,https://m.media-amazon.com/images/I/412r8L-mAe...,"[{'title': 'Corsair Vengeance DDR5 32GB', 'url...",Corsair,"[Electronics, Computers & Accessories, Compute...","{'RAM': '8 GB DDR5', 'Memory Speed': '4800 MHz...",B09YF1L6Y2,,,
3,All Electronics,UGREEN M.2 NVMe SSD Enclosure 10Gbps USB 3.2 E...,4.5,349,[High-speed Transmission: This NVMe M.2 SSD en...,UGREEN M.2 NVMe SSD Enclosure 10Gbps USB 3.2 E...,24.99,https://m.media-amazon.com/images/I/41quzN6SDu...,[{'title': 'Taking A Look At The ORICO M.2 Dri...,UGREEN,"[Electronics, Computers & Accessories, Compute...",{'Product Dimensions': '4.92 x 1.61 x 0.55 inc...,B0BQ6SYQWL,,,
4,,Slim & Expandable Laptop Backpack 15 15.6 16 I...,4.6,3663,[Slim & Expandable Design: Slim style for ligh...,Slim & Expandable Laptop Backpack 15 15.6 16 I...,29.99,https://m.media-amazon.com/images/I/51KN-Q5WXp...,[{'title': 'I got a Grey one A really slim bag...,ZINZ,"[Electronics, Computers & Accessories, Laptop ...","{'Brand': 'ZINZ', 'Item Weight': '1.59 pounds'...",B097CYFH1M,,,


In [15]:
list[tuple[Hashable, Any]](df_items['description'].items())[0]

(0,
 "ASUS Vivobook Go 12 L210 11.6‚Äù Ultra-Thin Laptop, 2022 Version, Intel Celeron N4020, 4GB RAM, 64GB eMMC, Win 11 Home in S Mode with One Year of Office 365 Personal, L210MA-DS02 Efficient Intel Celeron N4020 Processor 1.1 GHz (4M Cache, up to 2.8 GHz).Voltage:19.0 volts 11.6‚Äù HD (1366 x 768) Slim Display 64GB eMMC Flash Storage and 4GB LPDDR4 RAM Windows 11 in S mode with One Year of Office 365 Personal Slim and Portable: 0.7‚Äù thin and weighs only 2.31 lbs (battery included) USB 3.2 Gen 1 Type-C, USB 3.2 Gen 1 Type-A, HDMI (*USB Transfer speed may vary. Learn more at ASUS website) 802.11ac Wi-Fi for speeds up to three times faster than 802.11n Windows 11 in S mode is a 100% app based version of Windows where applications are verified and tested for quality on the Microsoft store. If you want to install an app that isn't available in the Microsoft Store, you'll need to switch out of S mode for free, which is easy and fast.")

### Sample 50 from the dataset

In [16]:
df_sample = df_items.sample(50, random_state=42)

In [17]:
len(df_sample)

50

In [19]:
data_to_embed = df_sample[['description', 'images', 'rating_number', 'price', 'average_rating', 'parent_asin']].to_dict(orient='records')

In [21]:
data_to_embed

[{'description': 'BOVKE Travel Carrying Case for Samaung T7 Shield / T7 / T7 Touch Portable SSD 500GB 1TB 2TB USB 3.2 External Solid State Drives, Extra Mesh Pocket for USB Cables and More Accessories, Black Case Only! ( samsung ssd & accessories not included )This external hard drive case perfectly fit for: Samsung T7 Shield Portable SSD, Samsung T7 Portable SSD, Samsung T7 Touch 500GB 1TB 2TB USB 3.2 Portable Solid State Drives, provide excellent protection, keeps the solid state external hard drives safe from damage and prolongs the service life of your T7 external ssd. This samsung t7 case bag is featured with 2 elastic bands inside, securely store the samsung t7 shield portable ssd, and the T7 Carrying Case can prevent the Samsung external ssd from falling, protects your external solid state drives from tossing around while on the go, gives your portable ssd great protection. This T7 shield hard drive carrying case comes with a mesh pocket to accommodate the USB cables of the Sams

### Define the embedding function

In [26]:
response = openai.embeddings.create(
    input="Random text",
    model="text-embedding-3-small"
)



In [29]:
len(response.data[0].embedding)

1536

In [30]:
def get_embeddings(text, model="text-embedding-3-small"):
    response = openai.embeddings.create(
        input=text,
        model=model
    )
    return response.data[0].embedding


In [32]:
get_embeddings("Hanlin")

[-0.038700953125953674,
 -0.013030647300183773,
 0.02125566639006138,
 0.03176264837384224,
 0.003122592344880104,
 -0.06665322184562683,
 -0.04404686018824577,
 0.03656827658414841,
 -0.017388414591550827,
 -0.04151608422398567,
 -0.02040259540081024,
 0.03756352514028549,
 -0.013066192157566547,
 -0.003236335003748536,
 0.02253527194261551,
 -0.010563853196799755,
 -0.05544956400990486,
 0.014303144067525864,
 0.001699920161627233,
 0.07450146228075027,
 -0.004005875438451767,
 0.016961880028247833,
 0.031193934381008148,
 0.026615791022777557,
 -0.009220266714692116,
 0.026047077029943466,
 0.025592105463147163,
 -0.045838307589292526,
 0.049876172095537186,
 -0.034748394042253494,
 0.07649195939302444,
 -0.029629971832036972,
 0.06255847960710526,
 0.013535380363464355,
 -0.00831032544374466,
 0.05107047036290169,
 -0.0035597907844930887,
 -0.02849254570901394,
 -0.015298392623662949,
 -0.04208479821681976,
 0.034009065479040146,
 0.0076705231331288815,
 -0.0158955417573452,
 -0.02

### Create Qdrant collection

In [33]:
qdrant_client = QdrantClient(url="http://localhost:6333")

In [34]:
qdrant_client.create_collection(
    collection_name="Amazon-items-collection-00",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)


True

### Embed data

#### Test

In [35]:
pointstruct = PointStruct(
    id=0,
    vector=get_embeddings("Test text"),
    payload={
        "text": "Test text",
        "model": "text-embedding-3-small"
    }
)

In [42]:
pointstruct

PointStruct(id=0, vector=[-0.020057253539562225, 0.006970119196921587, 0.037700485438108444, -0.040323127061128616, -0.01916317082941532, -0.0343029722571373, 0.0005480913096107543, -0.02425944246351719, 0.03871377930045128, 0.0015953787369653583, 0.030667034909129143, 0.012792832218110561, -0.010863103903830051, 0.011287793517112732, 0.02639034017920494, 0.04303517937660217, -0.046820126473903656, -0.00690678833052516, -0.02174110896885395, 0.05108192190527916, 0.0070856050588190556, 0.013113211840391159, 0.01130269467830658, -0.032544609159231186, -0.00393396383151412, -0.039041608572006226, -0.036955416202545166, 0.004965884145349264, 0.05841339752078056, -0.07462609559297562, 0.031382299959659576, -0.044018667191267014, -0.00025891143013723195, -0.010974864475429058, 0.006005255039781332, 0.03814752772450447, 0.03030940145254135, 0.037640880793333054, 0.010274499654769897, -0.029355714097619057, -0.014916278421878815, -0.010386260226368904, 0.021279167383909225, 0.01814987696707248

### Amazon data

In [43]:
pointstructs = []
for i, data in enumerate(data_to_embed):
    embedding = get_embeddings(data['description'])
    pointstructs.append(
        PointStruct(
            id=i,
            vector=embedding,
            payload=data
        )
    )

In [44]:
pointstructs

[PointStruct(id=0, vector=[0.007334774360060692, -0.02041487954556942, -0.006503597367554903, 0.009614435955882072, -0.04187966510653496, -0.0316527858376503, 0.024750610813498497, 0.01931636407971382, 0.04705143719911575, -0.02737538143992424, -0.010071340017020702, -0.0002015665522776544, -0.009001989848911762, -0.03433588147163391, 0.0029334237333387136, 0.00753406248986721, 0.02568386308848858, 0.016468003392219543, -0.05206766352057457, 0.024945037439465523, 0.022495253011584282, 0.03112783096730709, 0.0629555955529213, 0.03272213414311409, 0.02302020601928234, -0.014232086949050426, 0.01780955120921135, -0.0034486562944948673, 0.008933939971029758, -0.03631904348731041, 0.05109552666544914, -0.022359153255820274, 0.013512705452740192, -0.05996141582727432, -0.00812220573425293, 0.030797302722930908, 0.028483618050813675, -0.019724661484360695, -0.02583940513432026, 0.025022808462381363, 0.012647503986954689, 0.022670237347483635, 0.03431643918156624, 0.014669548720121384, 0.00442

In [45]:
len(pointstructs)

50

### Write embedded data to Qdrant

In [46]:
qdrant_client.upsert(
    collection_name="Amazon-items-collection-00",
    wait=True,
    points=pointstructs
)

UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

### Define a function for data retrievel

In [47]:
def retrieve_data(query, k=5):
    query_embedding = get_embeddings(query)
    results = qdrant_client.query_points(
        collection_name="Amazon-items-collection-00",
        query=query_embedding,
        limit=k
    )
    return results

### Test Retrieval

In [48]:
retrieve_data("What kind of charging cords do you offer?", k=10).points

[ScoredPoint(id=48, version=1, score=0.4413395, payload={'description': 'iPhone Charger Lightning Cable 3Pack Quick Charger Rapid Cord Apple MFi Certified Compatible iPhone 11 Pro X XR XS MAX 8 Plus 7 6s 5s 5c Air Mini iPod ', 'images': 'https://m.media-amazon.com/images/I/01RmK+J4pJL.gif', 'rating_number': 1096, 'price': None, 'average_rating': 4.3, 'parent_asin': 'B096BND451'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=4, version=1, score=0.4217459, payload={'description': 'BLACKSYNCZE USB C to Lightning Cable 2Pack 6FT, [MFi Certified] iPhone Charger Cord USB C Nylon Braided Type C to Lightning Cable Fast Charging for iPhone 13 12 11 Pro Max XR XS X 8 7 6s, SE2020 [MFi Certified iPhone Cable] This USB C to Lightning Cable features an upgraded C94 connector and original MFi chip for seamless compatibility and no pop-up error messages; protects your devices from overcurrent and overvoltage while charging. [20W PD Fast Charging Cable] With 20W PD fast charging, th