In [58]:
import os
import openai
import json
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm
import time


# Connect to ChatGpt Api

In [59]:
openai.organization = os.getenv("CHATGPTORG")
openai.api_key = os.getenv("CHATGPTKEY")
openai.Model.list()

<OpenAIObject list at 0x12a4396d0> JSON: {
  "data": [
    {
      "created": 1677532384,
      "id": "whisper-1",
      "object": "model",
      "owned_by": "openai-internal",
      "parent": null,
      "permission": [
        {
          "allow_create_engine": false,
          "allow_fine_tuning": false,
          "allow_logprobs": true,
          "allow_sampling": true,
          "allow_search_indices": false,
          "allow_view": true,
          "created": 1683912666,
          "group": null,
          "id": "modelperm-KlsZlfft3Gma8pI6A8rTnyjs",
          "is_blocking": false,
          "object": "model_permission",
          "organization": "*"
        }
      ],
      "root": "whisper-1"
    },
    {
      "created": 1649358449,
      "id": "babbage",
      "object": "model",
      "owned_by": "openai",
      "parent": null,
      "permission": [
        {
          "allow_create_engine": false,
          "allow_fine_tuning": false,
          "allow_logprobs": true,
         

## Check if the connection and saving is working


In [60]:
openai.ChatCompletion.create(
  model="gpt-4",
  messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who won the world series in 2020?"},
        {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
        {"role": "user", "content": "Where was it played?"}
    ]
)

<OpenAIObject chat.completion id=chatcmpl-7a2PFzl56rGla9xiBDhMUpr0D3nyt at 0x12a0cab30> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "The 2020 World Series was played in Arlington, Texas at the Globe Life Field, which was a neutral location chosen due to the COVID-19 pandemic.",
        "role": "assistant"
      }
    }
  ],
  "created": 1688823413,
  "id": "chatcmpl-7a2PFzl56rGla9xiBDhMUpr0D3nyt",
  "model": "gpt-4-0613",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 32,
    "prompt_tokens": 53,
    "total_tokens": 85
  }
}

## Start testing

In [61]:
# open our test dataframe from the csv
df = pd.read_csv('data/test_datasets/random/random_sample_100_v2.csv')
df.head()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,priceCurrency_right,specTableContent_right,cluster_id_right,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction
0,70108616,Maxxis,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,The new incarnation of the Minion DHR. Ready to shred any line.,110.0,CAD,,1084991,45040021,Maxxis,...,USD,,1084991,70108616#45040021,1,False,"[-4.3633804321, 4.6556377411]",1,[1.0],1
1,67977820,,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Crazy Inkjets is a leading supplier of high quality printing supplies for your Canon CLI251XL printer cartridge. CrazyInkjets products are guaranteed to meet or exceed the quality, reliability and yield standards of the original equipment remanufacturer. The units are tested to ensure compliance...",6.95,USD,,767463,69787972,Samsung,...,AUD,,1467531,67977820#69787972,0,True,"[3.8366084099, -4.2942962646]",0,[0.0],0
2,79859336,Crucial,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,,34.99,CAD,,1892167,65646040,Crucial,...,TRY,,1892167,79859336#65646040,1,False,"[-4.363576889, 4.6508393288]",1,[1.0],1
3,30368884,,"Crucial MX500 250GB 2.5"" SATA III","MX500 250GB SATA 2.5-inch, SATA 6.0Gb/s, 560 MB/s Read, 510 MB/s Write",43.59,EUR,,672125,86893846,,...,ZAR,,1409979,30368884#86893846,0,False,"[3.8347194195, -4.2961273193]",0,[0.0],0
4,82078171,,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,"ntel i3-7100, Core. Processor family: 7th gen Intel® Core™ i3, Processor frequency: 3.9 GHz, Processor socket: LGA 1151 (Socket H4). Memory channels: Dual, Maximum internal memory supported by processor: 64 GB, Memory types supported by processor: DDR3L-SDRAM,DDR4-SDRAM. On-board graphics adapte...",2899.0,ZAR,,443612,6914049,WESTERN DIGITAL,...,EUR,,549556,82078171#6914049,0,False,"[3.8386721611, -4.3010449409]",0,[0.0],0


In [62]:
df.shape

(100, 23)

In [63]:
# create a new column for the chatbot response
df['chatbot_response'] = ''
df['chatbot_response_raw'] = ''
df['chatbot_question'] = ''

In [64]:
# loop through the dataframe and send the messages to the chatbot
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    message = [
        {"role": "system", "content": "Please match these two products"},
        {"role": "user",
            "content": f" product 1: {row['title_left']}, product 2: {row['title_right']}"},
    ]
    error = ""
    for i in range(3):
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=message,
                max_tokens=3,
                temperature=0.0,
            )
            # if the above call succeeds, break out of the loop and continue
            break
        except Exception as e:
            error = e
            print(f"Error: {e}")
            # if the call fails, wait for one minute before retrying
            time.sleep(60)

    # check if the response was set successfully
    if response:
        # your code here
        df.at[index, 'chatbot_response_raw'] = response
        df.at[index, 'chatbot_response'] = response['choices'][0]['message']['content']
        df.at[index, 'chatbot_question'] = message
        print(index, response['choices'][0]['message']['content'])
    else:
        df.at[index, 'chatbot_response_raw'] = error
        df.at[index, 'chatbot_response'] = -1
        df.at[index, 'chatbot_question'] = message
        print("Error: response was not set successfully")

  0%|          | 0/100 [00:00<?, ?it/s]

0 product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead: Cycling Equipment, product 2: Maxxis Minion DHR II 3
1 product 1: Office Supplies, product 2: Office Supplies
2 product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266: Electronics
product 2: CRUCIAL CT4
3 product 1: Crucial MX500 250GB 2.5" SATA III: Electronics
product 2: JABARA Evolve 65 with Link 370 USB - Mono:
4 product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp
5 Product 1: Tissot Seastar 1000 T120.417.11.051.00: Watches
Product 2: Samsung Portable SSD T7 Touch MU-PC1
6 product 1: Samsung 970 EVO SSD M.2 2280 - 1TB SSD - 1 TB - M.2 2280 (80mm) - PCI Express 
7 product 1: Ilford Ilford FP4+ 125 120 B&W Film - Single Roll: Photography
product 2: Canon EF-S 18-200mm f3.5
8 product 1: iiyama 27' ETE, ULTRA SLIM LINE, 2560x1440 WQHD, IPS, 5ms, FreeSync, 13
9 product 1: Cycling Equipment, product 2: Cycling Equipment
10 product 1: Kingston Technology DataTraveler SE9 G

In [65]:
df.head()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question
0,70108616,Maxxis,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,The new incarnation of the Minion DHR. Ready to shred any line.,110.0,CAD,,1084991,45040021,Maxxis,...,70108616#45040021,1,False,"[-4.3633804321, 4.6556377411]",1,[1.0],1,"product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead: Cycling Equipment, product 2: Maxxis Minion DHR II 3","{'id': 'chatcmpl-7a2PJXD30RMVmVvOJrSzcqSRJ5ouz', 'object': 'chat.completion', 'created': 1688823417, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead: Cycling Equipment, product 2: Maxxis Minion DHR II 3"",  ""ro...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
1,67977820,,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Crazy Inkjets is a leading supplier of high quality printing supplies for your Canon CLI251XL printer cartridge. CrazyInkjets products are guaranteed to meet or exceed the quality, reliability and yield standards of the original equipment remanufacturer. The units are tested to ensure compliance...",6.95,USD,,767463,69787972,Samsung,...,67977820#69787972,0,True,"[3.8366084099, -4.2942962646]",0,[0.0],0,"product 1: Office Supplies, product 2: Office Supplies","{'id': 'chatcmpl-7a2PNNQOnhJRFKKMVzuqggncuJGVG', 'object': 'chat.completion', 'created': 1688823421, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Office Supplies, product 2: Office Supplies"",  ""role"": ""assistant"" }, 'finish_reason': 'stop'}], 'usage': {'...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
2,79859336,Crucial,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,,34.99,CAD,,1892167,65646040,Crucial,...,79859336#65646040,1,False,"[-4.363576889, 4.6508393288]",1,[1.0],1,product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266: Electronics\nproduct 2: CRUCIAL CT4,"{'id': 'chatcmpl-7a2POoegCmNDoLsZag4KOIztZj4mg', 'object': 'chat.completion', 'created': 1688823422, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266: Electronics\nproduct 2: CRUCIAL CT4"",  ""role"":...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
3,30368884,,"Crucial MX500 250GB 2.5"" SATA III","MX500 250GB SATA 2.5-inch, SATA 6.0Gb/s, 560 MB/s Read, 510 MB/s Write",43.59,EUR,,672125,86893846,,...,30368884#86893846,0,False,"[3.8347194195, -4.2961273193]",0,[0.0],0,"product 1: Crucial MX500 250GB 2.5"" SATA III: Electronics\nproduct 2: JABARA Evolve 65 with Link 370 USB - Mono:","{'id': 'chatcmpl-7a2PSC2hJ9ojRg1tvYYBagWh0Fcuh', 'object': 'chat.completion', 'created': 1688823426, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Crucial MX500 250GB 2.5\"" SATA III: Electronics\nproduct 2: JABARA Evolve 65 with Link 370 USB - Mono:"",  ""r...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
4,82078171,,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,"ntel i3-7100, Core. Processor family: 7th gen Intel® Core™ i3, Processor frequency: 3.9 GHz, Processor socket: LGA 1151 (Socket H4). Memory channels: Dual, Maximum internal memory supported by processor: 64 GB, Memory types supported by processor: DDR3L-SDRAM,DDR4-SDRAM. On-board graphics adapte...",2899.0,ZAR,,443612,6914049,WESTERN DIGITAL,...,82078171#6914049,0,False,"[3.8386721611, -4.3010449409]",0,[0.0],0,product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp,"{'id': 'chatcmpl-7a2PVnttBxPEhBOnQ6toIGkXLEg1y', 'object': 'chat.completion', 'created': 1688823429, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp"",  ""role"": ""assistant"" }, 'f...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."


In [66]:
# get the number of rows were the chatbot respionse is not empty
df[df['chatbot_response'] != ''].shape

(100, 26)

In [67]:
df.tail()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question
95,53525303,Ryze,Ryze Tello Battery Charging Hub G1CH,,129,DKK,,1555405,79449686,,...,53525303#79449686,0,True,"[3.6632392406000003, -4.1180138588]",0,[0.0],0,"product 1: Ryze Tello Battery Charging Hub G1CH: Electronics, product 2: AirPods with Wireless Charging Case: Electronics","{'id': 'chatcmpl-7a2U4IyKM563DbZAJkfvcy2LMdScH', 'object': 'chat.completion', 'created': 1688823712, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Ryze Tello Battery Charging Hub G1CH: Electronics, product 2: AirPods with Wireless Charging Case: Electronic...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
96,81415527,,2MP 25X Network IR PTZ Camera,"1/2.8\"" progressive scan CMOSUp to 1920 × 1080@30fps resolutionMin. illumination: Color: 0.005 Lux @(F1.6, AGC ON) B/W: 0.001 Lux @(F1.6, AGC ON) 0 Lux with IR25× optical zoom, 16× digital zoomWDR, HLC, BLC, 3D DNR, Defog, EIS, Regional Exposure, Regional FocusUp to 150 m IR distance24 VAC & Hi-...","Incl. BTW€1.035,76",EUR,,2694265,46810114,,...,81415527#46810114,1,False,"[-4.1793274879, 4.4736194611]",1,[1.0],1,product 1: 2MP 25X Network IR PTZ Camera: Electronics\nproduct 2: HIKVISION 2MP 25X NETWORK IR SPEED DOME CAMERA DS-2,"{'id': 'chatcmpl-7a2U7eSkdNrFqE25zBYz9zzBAxjJu', 'object': 'chat.completion', 'created': 1688823715, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: 2MP 25X Network IR PTZ Camera: Electronics\nproduct 2: HIKVISION 2MP 25X NETWORK IR SPEED DOME CAMERA DS-2"", ...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
97,53053823,,Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens,"The Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens is great for photos and movies. With improved Image Stabilisation, it allows hand held shooting even in lower light conditions.Features 16 elements from 12 groups18-135mm Focal LengthBuilt-in Image StabiliserNano USM technologyMaximum magnificat...",799,AUD,,390091,45511817,,...,53053823#45511817,0,True,"[3.7480974197, -4.2194314003]",0,[0.0],0,product 1: Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens: Electronics\nproduct 2: Nikon Nikkor AF-S 24,"{'id': 'chatcmpl-7a2UAdB9lNQJjjIT15dxMykW1DHn6', 'object': 'chat.completion', 'created': 1688823718, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens: Electronics\nproduct 2: Nikon Nikkor AF-S 24"",  ""role"": ""ass...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
98,72039756,,"Cooler Master Chassis, MasterBox Lite 5 Tower",,64.52,USD,,1006147,43470746,,...,72039756#43470746,1,False,"[-4.3535614014, 4.6426811218]",1,[1.0],1,"product 1: Cooler Master Chassis, MasterBox Lite 5 Tower: Computer Hardware\nproduct 2: CASE MID-TOWER NO PSU MASTERBOX LITE 5 3USB3 BLACK","{'id': 'chatcmpl-7a2UEQFZQ5peIqDXOWceYq1w3g06j', 'object': 'chat.completion', 'created': 1688823722, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Cooler Master Chassis, MasterBox Lite 5 Tower: Computer Hardware\nproduct 2: CASE MID-TOWER NO PSU MASTERBOX ...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."
99,79205401,,Crucial BX500 120GB 2.5″ SSD,"Ever wonder why your phone responds faster than your computer? It’s because your phone runs on flash memory. Add flash memory to your laptop or desktop computer with the Crucial BX500 120GB 2.5″ SSD, the easiest way to get all the speed of a new computer without the price. This SSD offers sequen...",499.0,ZAR,,744155,31394689,Crucial,...,79205401#31394689,0,True,"[3.8113934994000003, -4.2730379105]",0,[0.0],0,product 1: Crucial BX500 120GB 2.5″ SSD: Electronics\nproduct 2: Crucial Crucial SSD MX500 500GB M.2 2280,"{'id': 'chatcmpl-7a2UHbSgeB781qspK8V2c7sTL4clA', 'object': 'chat.completion', 'created': 1688823725, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Crucial BX500 120GB 2.5\u2033 SSD: Electronics\nproduct 2: Crucial Crucial SSD MX500 500GB M.2 2280"",  ""role...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1..."


In [68]:
def clean_response(response):
    if "yes" in response.lower():
        return 1
    elif "no" in response.lower():
        return 0
    else:
        return -1

In [83]:
# add a coloumn with a clean version of the chatbot response if the chatresponse contains Yes/yes it will be 1 else if it contains No/no it will be 0 if it is 0 or 1 
df['chatbot_response_clean'] = df['chatbot_response'].apply(lambda x: clean_response(x))

In [73]:
# get the current date and time
now = datetime.now()

# save the dataframe as a json file
df.to_json(f'data/results/chat_gpt/medium/{now}_simple_promt_all.json')

In [74]:
# look at the wrong answers
df[df['chatbot_response_clean'] != df['label']].head()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question,chatbot_response_clean
0,70108616,Maxxis,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,The new incarnation of the Minion DHR. Ready to shred any line.,110.0,CAD,,1084991,45040021,Maxxis,...,1,False,"[-4.3633804321, 4.6556377411]",1,[1.0],1,"product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead: Cycling Equipment, product 2: Maxxis Minion DHR II 3","{'id': 'chatcmpl-7a2PJXD30RMVmVvOJrSzcqSRJ5ouz', 'object': 'chat.completion', 'created': 1688823417, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead: Cycling Equipment, product 2: Maxxis Minion DHR II 3"",  ""ro...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1...",
1,67977820,,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Crazy Inkjets is a leading supplier of high quality printing supplies for your Canon CLI251XL printer cartridge. CrazyInkjets products are guaranteed to meet or exceed the quality, reliability and yield standards of the original equipment remanufacturer. The units are tested to ensure compliance...",6.95,USD,,767463,69787972,Samsung,...,0,True,"[3.8366084099, -4.2942962646]",0,[0.0],0,"product 1: Office Supplies, product 2: Office Supplies","{'id': 'chatcmpl-7a2PNNQOnhJRFKKMVzuqggncuJGVG', 'object': 'chat.completion', 'created': 1688823421, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Office Supplies, product 2: Office Supplies"",  ""role"": ""assistant"" }, 'finish_reason': 'stop'}], 'usage': {'...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1...",
2,79859336,Crucial,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,,34.99,CAD,,1892167,65646040,Crucial,...,1,False,"[-4.363576889, 4.6508393288]",1,[1.0],1,product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266: Electronics\nproduct 2: CRUCIAL CT4,"{'id': 'chatcmpl-7a2POoegCmNDoLsZag4KOIztZj4mg', 'object': 'chat.completion', 'created': 1688823422, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266: Electronics\nproduct 2: CRUCIAL CT4"",  ""role"":...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1...",
3,30368884,,"Crucial MX500 250GB 2.5"" SATA III","MX500 250GB SATA 2.5-inch, SATA 6.0Gb/s, 560 MB/s Read, 510 MB/s Write",43.59,EUR,,672125,86893846,,...,0,False,"[3.8347194195, -4.2961273193]",0,[0.0],0,"product 1: Crucial MX500 250GB 2.5"" SATA III: Electronics\nproduct 2: JABARA Evolve 65 with Link 370 USB - Mono:","{'id': 'chatcmpl-7a2PSC2hJ9ojRg1tvYYBagWh0Fcuh', 'object': 'chat.completion', 'created': 1688823426, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Crucial MX500 250GB 2.5\"" SATA III: Electronics\nproduct 2: JABARA Evolve 65 with Link 370 USB - Mono:"",  ""r...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1...",
4,82078171,,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,"ntel i3-7100, Core. Processor family: 7th gen Intel® Core™ i3, Processor frequency: 3.9 GHz, Processor socket: LGA 1151 (Socket H4). Memory channels: Dual, Maximum internal memory supported by processor: 64 GB, Memory types supported by processor: DDR3L-SDRAM,DDR4-SDRAM. On-board graphics adapte...",2899.0,ZAR,,443612,6914049,WESTERN DIGITAL,...,0,False,"[3.8386721611, -4.3010449409]",0,[0.0],0,product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp,"{'id': 'chatcmpl-7a2PVnttBxPEhBOnQ6toIGkXLEg1y', 'object': 'chat.completion', 'created': 1688823429, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {  ""content"": ""product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp"",  ""role"": ""assistant"" }, 'f...","[{'role': 'system', 'content': 'You are a helpful assistant that assigns products to product categories. Please provide them in this format: product 1: category product 2: category They should be at a high level so things like electronics, outdoors etc.'}, {'role': 'user', 'content': ' product 1...",


In [75]:
# Check the accuracy of the chatbot 
df[df['chatbot_response_clean'] == df['label']].shape[0]
print(f"Accuracy: {df[df['chatbot_response_clean'] == df['label']].shape[0] / df.shape[0]}")

Accuracy: 0.0


In [76]:
# Check the number of -1 responses
df[df['chatbot_response_clean'] == -1].shape[0]

0

In [77]:
# Look at 5 complete chatbot responses dont limit the output length
# set the max_rows and max_columns options to None
# set the max_colwidth option to None
pd.set_option('display.max_colwidth', 300)

# display the first 5 chatbot responses where chatbot_response_raw is not -1
df['chatbot_response_raw'].head(3)

0    {'id': 'chatcmpl-7a2PJXD30RMVmVvOJrSzcqSRJ5ouz', 'object': 'chat.completion', 'created': 1688823417, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {
  "content": "product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead: Cycling Equipment, product 2: Maxxis Minion DHR II 3",
  "ro...
1    {'id': 'chatcmpl-7a2PNNQOnhJRFKKMVzuqggncuJGVG', 'object': 'chat.completion', 'created': 1688823421, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {
  "content": "product 1: Office Supplies, product 2: Office Supplies",
  "role": "assistant"
}, 'finish_reason': 'stop'}], 'usage': {'...
2    {'id': 'chatcmpl-7a2POoegCmNDoLsZag4KOIztZj4mg', 'object': 'chat.completion', 'created': 1688823422, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {
  "content": "product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266: Electronics\nproduct 2: CRUCIAL CT4",
  "role":...
Name: chatbot_response_raw, dtype: object

In [78]:
# Lets look at all mistakes only show the title_left, title_right, label and chatbot_response_clean
df[df['chatbot_response_clean'] != df['label']][['title_left', 'title_right', 'label', 'chatbot_response_clean']]


Unnamed: 0,title_left,title_right,label,chatbot_response_clean
0,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,"Maxxis Minion DHR II 3C MaxxTerra/DD TR 29\"" Tire - 29 x 2.3\"" (Folding Bead)",1,
1,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Samsung MLT-D103L, High Yield Black Toner/Drum for ML-2950ND / 2955ND / 2955DW, SCX-4728FD / 4729FD / 4729FW (2,500 pages)",0,
2,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,CRUCIAL CT4G4SFS8266 4Gb 2666Mhz DDR4 Notebook RAM SODIMM CL19 1.2V (By Micron),1,
3,"Crucial MX500 250GB 2.5"" SATA III",JABARA Evolve 65 with Link 370 USB - Mono,0,
4,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,DISCO DURO 2.5SSD 1TB SATA3 WD BLUE 3D NAND,0,
...,...,...,...,...
95,Ryze Tello Battery Charging Hub G1CH,AirPods with Wireless Charging Case,0,
96,2MP 25X Network IR PTZ Camera,HIKVISION 2MP 25X NETWORK IR SPEED DOME CAMERA DS-2DE5225IW-AE,1,
97,Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens,0000071495| Nikon Nikkor AF-S 24-85mm f/3.5-4.5G ED VR Lens,0,
98,"Cooler Master Chassis, MasterBox Lite 5 Tower",CASE MID-TOWER NO PSU MASTERBOX LITE 5 3USB3 BLACK WINDOW PANEL,1,


In [79]:
# how many currencies are there in the dataset
df['priceCurrency_left'].unique()

array(['CAD', 'USD', 'EUR', 'ZAR', 'DKK', 'GBP', nan, 'AED', 'MYR', 'AUD',
       'PLN', 'KYD', 'NOK', 'CZK', 'INR', 'HRK', 'SEK', 'CHF', 'NZD',
       'RUB'], dtype=object)

In [80]:
# How often does priceCurrency_left and priceCurrency_right match
df[df['priceCurrency_left'] == df['priceCurrency_right']].shape[0]

13