In [36]:
import os
import json
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm
import time
import requests


# Connect to Aleph Alpha

In [37]:
def get_answer(question):
    url = "https://api.aleph-alpha.com/complete"

    payload = json.dumps({
    "model": "luminous-base",
    "prompt": question,
    "maximum_tokens": 6
    })
    headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json',
    'Authorization': f"Bearer {os.getenv('ALEPHALPHATOKEN')}"
    }

    return requests.request("POST", url, headers=headers, data=payload).json()

print(get_answer("An apple a day"))

{'completions': [{'completion': ' keeps the doctor away, but', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}


## Start testing

In [38]:
# open our test dataframe from the csv
df = pd.read_csv('data/test/random_sample_100_v2.csv')
df.head()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,priceCurrency_right,specTableContent_right,cluster_id_right,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction
0,70108616,Maxxis,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,The new incarnation of the Minion DHR. Ready to shred any line.,110.0,CAD,,1084991,45040021,Maxxis,...,USD,,1084991,70108616#45040021,1,False,"[-4.3633804321, 4.6556377411]",1,[1.0],1
1,67977820,,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Crazy Inkjets is a leading supplier of high quality printing supplies for your Canon CLI251XL printer cartridge. CrazyInkjets products are guaranteed to meet or exceed the quality, reliability and yield standards of the original equipment remanufacturer. The units are tested to ensure compliance with original specifications and performance criteria and offer a sound and economical alternative to the expensive brand-name products.",6.95,USD,,767463,69787972,Samsung,...,AUD,,1467531,67977820#69787972,0,True,"[3.8366084099, -4.2942962646]",0,[0.0],0
2,79859336,Crucial,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,,34.99,CAD,,1892167,65646040,Crucial,...,TRY,,1892167,79859336#65646040,1,False,"[-4.363576889, 4.6508393288]",1,[1.0],1
3,30368884,,"Crucial MX500 250GB 2.5"" SATA III","MX500 250GB SATA 2.5-inch, SATA 6.0Gb/s, 560 MB/s Read, 510 MB/s Write",43.59,EUR,,672125,86893846,,...,ZAR,,1409979,30368884#86893846,0,False,"[3.8347194195, -4.2961273193]",0,[0.0],0
4,82078171,,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,"ntel i3-7100, Core. Processor family: 7th gen Intel® Core™ i3, Processor frequency: 3.9 GHz, Processor socket: LGA 1151 (Socket H4). Memory channels: Dual, Maximum internal memory supported by processor: 64 GB, Memory types supported by processor: DDR3L-SDRAM,DDR4-SDRAM. On-board graphics adapter model: Intel® HD Graphics 630, Maximum on-board graphics adapter memory: 64 GB, On-board graphics adapter outputs supported: DisplayPort,Embedded DisplayPort (eDP),HDMI. Thermal Design Power (TDP): 51 W. PCI Express configurations: 1x16,1x8+2x4,2x8, Supported instruction sets: AVX 2.0,SSE4.1,SSE4.2, Scalability: 1S",2899.0,ZAR,,443612,6914049,WESTERN DIGITAL,...,EUR,,549556,82078171#6914049,0,False,"[3.8386721611, -4.3010449409]",0,[0.0],0


In [39]:
# create a new column for the chatbot response
df['chatbot_response'] = ''
df['chatbot_response_raw'] = ''
df['chatbot_question'] = ''

In [40]:
# loop through the dataframe and send the messages to the chatbot
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    message = f"Do the following two product decriptions match. Answer with yes or no. product 1: {row['title_left']}, product 2: {row['title_right']}"
    error = ""
    for i in range(3):
        try:
            response = get_answer(message)
            # if the above call succeeds, break out of the loop and continue
            break
        except Exception as e:
            error = e
            print(f"Error: {e}")
            # if the call fails, wait for one minute before retrying
            time.sleep(60)

    # check if the response was set successfully
    if response:
        # your code here
        df.at[index, 'chatbot_response_raw'] = response
        df.at[index, 'chatbot_response'] = response['completions'][0]['completion']
        df.at[index, 'chatbot_question'] = message
    else:
        df.at[index, 'chatbot_response_raw'] = error
        df.at[index, 'chatbot_response'] = -1
        df.at[index, 'chatbot_question'] = message
        print("Error: response was not set successfully")

  0%|          | 0/100 [00:00<?, ?it/s]

In [41]:
df.head()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question
0,70108616,Maxxis,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,The new incarnation of the Minion DHR. Ready to shred any line.,110.0,CAD,,1084991,45040021,Maxxis,...,70108616#45040021,1,False,"[-4.3633804321, 4.6556377411]",1,[1.0],1,\n\nA:\n\n,"{'completions': [{'completion': ' A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead, product 2: Maxxis Minion DHR II 3C MaxxTerra/DD TR 29\"" Tire - 29 x 2.3\"" (Folding Bead)"
1,67977820,,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Crazy Inkjets is a leading supplier of high quality printing supplies for your Canon CLI251XL printer cartridge. CrazyInkjets products are guaranteed to meet or exceed the quality, reliability and yield standards of the original equipment remanufacturer. The units are tested to ensure compliance with original specifications and performance criteria and offer a sound and economical alternative to the expensive brand-name products.",6.95,USD,,767463,69787972,Samsung,...,67977820#69787972,0,True,"[3.8366084099, -4.2942962646]",0,[0.0],0,\n\nA:\n\n,"{'completions': [{'completion': ' A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured, product 2: Samsung MLT-D103L, High Yield Black Toner/Drum for ML-2950ND / 2955ND / 2955DW, SCX-4728FD / 4729FD / 4729FW (2,500 pages)"
2,79859336,Crucial,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,,34.99,CAD,,1892167,65646040,Crucial,...,79859336#65646040,1,False,"[-4.363576889, 4.6508393288]",1,[1.0],1,"\n\nA: Yes,","{'completions': [{'completion': ' A: Yes,', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266, product 2: CRUCIAL CT4G4SFS8266 4Gb 2666Mhz DDR4 Notebook RAM SODIMM CL19 1.2V (By Micron)"
3,30368884,,"Crucial MX500 250GB 2.5"" SATA III","MX500 250GB SATA 2.5-inch, SATA 6.0Gb/s, 560 MB/s Read, 510 MB/s Write",43.59,EUR,,672125,86893846,,...,30368884#86893846,0,False,"[3.8347194195, -4.2961273193]",0,[0.0],0,.\n\nA: Yes,"{'completions': [{'completion': '. A: Yes', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Crucial MX500 250GB 2.5"" SATA III, product 2: JABARA Evolve 65 with Link 370 USB - Mono"
4,82078171,,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,"ntel i3-7100, Core. Processor family: 7th gen Intel® Core™ i3, Processor frequency: 3.9 GHz, Processor socket: LGA 1151 (Socket H4). Memory channels: Dual, Maximum internal memory supported by processor: 64 GB, Memory types supported by processor: DDR3L-SDRAM,DDR4-SDRAM. On-board graphics adapter model: Intel® HD Graphics 630, Maximum on-board graphics adapter memory: 64 GB, On-board graphics adapter outputs supported: DisplayPort,Embedded DisplayPort (eDP),HDMI. Thermal Design Power (TDP): 51 W. PCI Express configurations: 1x16,1x8+2x4,2x8, Supported instruction sets: AVX 2.0,SSE4.1,SSE4.2, Scalability: 1S",2899.0,ZAR,,443612,6914049,WESTERN DIGITAL,...,82078171#6914049,0,False,"[3.8386721611, -4.3010449409]",0,[0.0],0,"; 2.5"" SA","{'completions': [{'completion': '; 2.5"" SA', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C, product 2: DISCO DURO 2.5SSD 1TB SATA3 WD BLUE 3D NAND"


In [42]:
# get the number of rows were the chatbot respionse is not empty
df[df['chatbot_response'] != ''].shape

(100, 26)

In [43]:
df.tail()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question
95,53525303,Ryze,Ryze Tello Battery Charging Hub G1CH,,129,DKK,,1555405,79449686,,...,53525303#79449686,0,True,"[3.6632392406000003, -4.1180138588]",0,[0.0],0,.\n\nA:\n,"{'completions': [{'completion': '. A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Ryze Tello Battery Charging Hub G1CH, product 2: AirPods with Wireless Charging Case"
96,81415527,,2MP 25X Network IR PTZ Camera,"1/2.8\"" progressive scan CMOSUp to 1920 × 1080@30fps resolutionMin. illumination: Color: 0.005 Lux @(F1.6, AGC ON) B/W: 0.001 Lux @(F1.6, AGC ON) 0 Lux with IR25× optical zoom, 16× digital zoomWDR, HLC, BLC, 3D DNR, Defog, EIS, Regional Exposure, Regional FocusUp to 150 m IR distance24 VAC & Hi-PoESupport H.265+/H.265 video compression","Incl. BTW€1.035,76",EUR,,2694265,46810114,,...,81415527#46810114,1,False,"[-4.1793274879, 4.4736194611]",1,[1.0],1,\n\nA:\n\n,"{'completions': [{'completion': ' A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: 2MP 25X Network IR PTZ Camera, product 2: HIKVISION 2MP 25X NETWORK IR SPEED DOME CAMERA DS-2DE5225IW-AE"
97,53053823,,Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens,"The Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens is great for photos and movies. With improved Image Stabilisation, it allows hand held shooting even in lower light conditions.Features 16 elements from 12 groups18-135mm Focal LengthBuilt-in Image StabiliserNano USM technologyMaximum magnification of 0.28x7 blade circular aperture diaphragmThe Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens is compact and lightweight making it easy to carry during travel. It covers a large zoom range allowing you to take photos without changing lens in a wide range of situations. The Nano USM technology allows you to focus with fantastic speed silently so as",799,AUD,,390091,45511817,,...,53053823#45511817,0,True,"[3.7480974197, -4.2194314003]",0,[0.0],0,.\n\nA:\n,"{'completions': [{'completion': '. A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Canon EF-S 18-135mm f/3.5-5.6 IS USM Zoom Lens, product 2: 0000071495| Nikon Nikkor AF-S 24-85mm f/3.5-4.5G ED VR Lens"
98,72039756,,"Cooler Master Chassis, MasterBox Lite 5 Tower",,64.52,USD,,1006147,43470746,,...,72039756#43470746,1,False,"[-4.3535614014, 4.6426811218]",1,[1.0],1,.\n\nA: Yes,"{'completions': [{'completion': '. A: Yes', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Cooler Master Chassis, MasterBox Lite 5 Tower, product 2: CASE MID-TOWER NO PSU MASTERBOX LITE 5 3USB3 BLACK WINDOW PANEL"
99,79205401,,Crucial BX500 120GB 2.5″ SSD,"Ever wonder why your phone responds faster than your computer? It’s because your phone runs on flash memory. Add flash memory to your laptop or desktop computer with the Crucial BX500 120GB 2.5″ SSD, the easiest way to get all the speed of a new computer without the price. This SSD offers sequential read speeds of up to 540MB/s and sequential write speeds of up to 500MB/s, about 300% faster than a typical HDD.Improve PerformanceBoot up faster, load files quicker and improve overall system responsiveness for all your computing needs.Tools for Easy InstallationThis SSD comes with simple instructions, cloning software,",499.0,ZAR,,744155,31394689,Crucial,...,79205401#31394689,0,True,"[3.8113934994000003, -4.2730379105]",0,[0.0],0,G1BX500\n,"{'completions': [{'completion': 'G1BX500 ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Crucial BX500 120GB 2.5″ SSD, product 2: Crucial Crucial SSD MX500 500GB M.2 2280 SATA 3.0 CT500MX500SSD4"


In [44]:
def clean_response(response):
    if "yes" in response.lower():
        return 1
    elif "no" in response.lower():
        return 0
    else:
        return -1

In [45]:
# add a coloumn with a clean version of the chatbot response if the chatresponse contains Yes/yes it will be 1 else if it contains No/no it will be 0 if it is 0 or 1 
df['chatbot_response_clean'] = df['chatbot_response'].apply(lambda x: clean_response(x))

In [46]:
# get the current date and time
now = datetime.now()

# save the dataframe as a json file
df.to_json(f'data/results/aleph_alpha/random_sample_v2/{now}_simple_promt.json')

In [47]:
df.head()

Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,brand_right,...,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question,chatbot_response_clean
0,70108616,Maxxis,Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead,The new incarnation of the Minion DHR. Ready to shred any line.,110.0,CAD,,1084991,45040021,Maxxis,...,1,False,"[-4.3633804321, 4.6556377411]",1,[1.0],1,\n\nA:\n\n,"{'completions': [{'completion': ' A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Maxxis Maxxis Minion DHR2 29 x 2.3 Folding Bead, product 2: Maxxis Minion DHR II 3C MaxxTerra/DD TR 29\"" Tire - 29 x 2.3\"" (Folding Bead)",-1
1,67977820,,Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured,"Crazy Inkjets is a leading supplier of high quality printing supplies for your Canon CLI251XL printer cartridge. CrazyInkjets products are guaranteed to meet or exceed the quality, reliability and yield standards of the original equipment remanufacturer. The units are tested to ensure compliance with original specifications and performance criteria and offer a sound and economical alternative to the expensive brand-name products.",6.95,USD,,767463,69787972,Samsung,...,0,True,"[3.8366084099, -4.2942962646]",0,[0.0],0,\n\nA:\n\n,"{'completions': [{'completion': ' A: ', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Canon CLI251XL High Yield Black Inkjet Cartridge Remanufactured, product 2: Samsung MLT-D103L, High Yield Black Toner/Drum for ML-2950ND / 2955ND / 2955DW, SCX-4728FD / 4729FD / 4729FW (2,500 pages)",-1
2,79859336,Crucial,Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266,,34.99,CAD,,1892167,65646040,Crucial,...,1,False,"[-4.363576889, 4.6508393288]",1,[1.0],1,"\n\nA: Yes,","{'completions': [{'completion': ' A: Yes,', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Crucial Crucial Memory 4GB DDR4 2666 Unbuffered CT4G4SFS8266, product 2: CRUCIAL CT4G4SFS8266 4Gb 2666Mhz DDR4 Notebook RAM SODIMM CL19 1.2V (By Micron)",1
3,30368884,,"Crucial MX500 250GB 2.5"" SATA III","MX500 250GB SATA 2.5-inch, SATA 6.0Gb/s, 560 MB/s Read, 510 MB/s Write",43.59,EUR,,672125,86893846,,...,0,False,"[3.8347194195, -4.2961273193]",0,[0.0],0,.\n\nA: Yes,"{'completions': [{'completion': '. A: Yes', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Crucial MX500 250GB 2.5"" SATA III, product 2: JABARA Evolve 65 with Link 370 USB - Mono",1
4,82078171,,Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C,"ntel i3-7100, Core. Processor family: 7th gen Intel® Core™ i3, Processor frequency: 3.9 GHz, Processor socket: LGA 1151 (Socket H4). Memory channels: Dual, Maximum internal memory supported by processor: 64 GB, Memory types supported by processor: DDR3L-SDRAM,DDR4-SDRAM. On-board graphics adapter model: Intel® HD Graphics 630, Maximum on-board graphics adapter memory: 64 GB, On-board graphics adapter outputs supported: DisplayPort,Embedded DisplayPort (eDP),HDMI. Thermal Design Power (TDP): 51 W. PCI Express configurations: 1x16,1x8+2x4,2x8, Supported instruction sets: AVX 2.0,SSE4.1,SSE4.2, Scalability: 1S",2899.0,ZAR,,443612,6914049,WESTERN DIGITAL,...,0,False,"[3.8386721611, -4.3010449409]",0,[0.0],0,"; 2.5"" SA","{'completions': [{'completion': '; 2.5"" SA', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-04'}","Do the following two product decriptions match. Answer with yes or no. product 1: Intel Core I3 7th Gen 7100 3.90 Ghz; 2 Core 4 Thread; 3 Mb Smartcache; 51 W Tdp; Lga 1151 S R35 C, product 2: DISCO DURO 2.5SSD 1TB SATA3 WD BLUE 3D NAND",-1


In [48]:
# Check the accuracy of the chatbot 
df[df['chatbot_response_clean'] == df['label']].shape[0]
print(f"Accuracy: {df[df['chatbot_response_clean'] == df['label']].shape[0] / df.shape[0]}")

Accuracy: 0.08


In [49]:
# Check the number of -1 responses
df[df['chatbot_response_clean'] == -1].shape[0]

82

In [50]:
# load data/results/aleph_alpha/2023-04-24 22:42:34.718717_simple_promt.json into a dataframe
df = pd.read_json('data/results/aleph_alpha/2023-04-24 22:42:34.718717_simple_promt.json')

In [51]:
# Look at 5 complete chatbot responses dont limit the output length
# set the max_rows and max_columns options to None
# set the max_colwidth option to None
pd.set_option('display.max_colwidth', None)

# display the first 5 chatbot responses where chatbot_response_raw is not -1
df['chatbot_response_raw'].head(5)

0    {'completions': [{'completion': ', product 3: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 4: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 5: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 6: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 7: TAG Heuer Monaco', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-01'}
1                                                                                                                                                                                                                           {'completions': [{'completion': '

A:

Yes, they are the same.
The only difference is the color.

', 'finish_reason': 'end_of_text'}], 'model_version': '2023-01'}
2                             {'completions': [{'completion': ', I am looking for the black frame.

A:

The black frame is a separate product.
The black frame is a plastic frame that you can use to hold the film in place.
The black fr