In [38]:
import os
import json
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm
import time
import requests


# Connect to Aleph Alpha

In [39]:
def get_answer(question):
    url = "https://api.aleph-alpha.com/complete"

    payload = json.dumps({
    "model": "luminous-base",
    "prompt": question,
    "maximum_tokens": 64
    })
    headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json',
    'Authorization': f"Bearer {os.getenv('ALEPHALPHATOKEN')}"
    }

    return requests.request("POST", url, headers=headers, data=payload).json()

print(get_answer("An apple a day"))

{'error': 'Account does not have enough credits left to process this request', 'code': 'OUT_OF_CREDITS'}


## Start testing

In [40]:
# open our test dataframe from the csv
df = pd.read_csv('data/test/random_sample_100.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,...,priceCurrency_right,specTableContent_right,cluster_id_right,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction
0,1639,58378163,,Brother HL-L6200DW Wireless High Speed Mono La...,WIRELESS HIGH SPEED MONO LASER PRINTER WITH2-S...,617.38,AUD,,5712817,44664998,...,USD,,408446,58378163#44664998,0,True,"[3.8065164089, -4.2674832344]",0,[0.0],0
1,460,91664569,,Double Socket Arm - Long - RAM-B-201U-C,"Long Arm is 6\"" for applications requiring mor...",23.49,USD,,1366684,17589843,...,USD,,2490160,91664569#17589843,0,True,"[3.8207249641, -4.2826638222]",0,[0.0],0
2,1760,1575940,,Corsair Vengeance LPX RAM 16GB 2400MHz DDR4 UD...,CORSAIR Vengeance LPX 16GB DDR4 memory is desi...,5499.0,INR,,150984,31345669,...,,,150984,1575940#31345669,1,False,"[-4.3838095665, 4.6694221497]",1,[1.0],1
3,4145,7125489,,"HIK 8CH TURBO HD DVR, DS-7208HUHI-K1","Hikvision 8-Channel Turbo HD DVR, 1080p, 3MP, ...",0.0,,,1069672,50492036,...,EUR,,56750,7125489#50492036,0,True,"[3.8441700935, -4.3036942482]",0,[0.0],0
4,2753,2004031,,TAG Heuer Men's Special Edition Heuer Monaco W...,TAG Heuer Monaco Stainless Steel Blue Mens Wat...,5250.0,GBP,,556904,6187863,...,EUR,,793378,2004031#6187863,0,False,"[3.8281166553, -4.2899880409]",0,[0.0],0


In [41]:
# create a new column for the chatbot response
df['chatbot_response'] = ''
df['chatbot_response_raw'] = ''
df['chatbot_question'] = ''

In [42]:
# loop through the dataframe and send the messages to the chatbot
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    message = f"Are these two products the same? product 1: {row['title_left']}, product 2: {row['title_right']}"
    error = ""
    for i in range(3):
        try:
            response = get_answer(message)
            # if the above call succeeds, break out of the loop and continue
            break
        except Exception as e:
            error = e
            print(f"Error: {e}")
            # if the call fails, wait for one minute before retrying
            time.sleep(60)

    # check if the response was set successfully
    if response:
        # your code here
        df.at[index, 'chatbot_response_raw'] = response
        df.at[index, 'chatbot_response'] = response['completions'][0]['completion']
        df.at[index, 'chatbot_question'] = message
    else:
        df.at[index, 'chatbot_response_raw'] = error
        df.at[index, 'chatbot_response'] = -1
        df.at[index, 'chatbot_question'] = message
        print("Error: response was not set successfully")

  0%|          | 0/100 [00:00<?, ?it/s]

KeyError: 'completions'

In [None]:
# get the current date and time
now = datetime.now()

# save the dataframe as a json file
df.to_json(f'data/results/aleph_alpha/{now}_simple_promt.json')

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,...,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question
0,1840,98621092,,TAG Heuer Monaco Chronograph Calibre 11 Automatic,Design inspired by the watch worn by Steve McQ...,"£4,195.00",,,556904,2004031,...,98621092#2004031,1,False,"[3.7184028625, -4.1772656441]",0,[1.0],1,", product 3: TAG Heuer Monaco Chronograph Cali...","{'completions': [{'completion': ', product 3: ...",Are these two products the same? product 1: TA...
1,710,47887901,,Apple AirPods (2nd Generation) Bluetooth Earbu...,Staples.com: Apple AirPods MRXJ2AM/A In the Ea...,199,USD,,1994510,49097388,...,47887901#49097388,0,True,"[-1.7439045906000001, 1.717010498]",1,[0.0],0,"\n\nA:\n\nYes, they are the same.\nThe only di...","{'completions': [{'completion': ' A: Yes, th...",Are these two products the same? product 1: Ap...
2,2065,60950283,,Fujifilm Instax Square Instant Film - 10 Photos,The Fujifilm Instax Square Instant Film is for...,11.99,GBP,,1457721,25935656,...,60950283#25935656,0,True,"[-2.1758391857, 2.2021055222]",1,[0.0],0,", I am looking for the black frame.\n\nA:\n\nT...","{'completions': [{'completion': ', I am lookin...",Are these two products the same? product 1: Fu...
3,3869,435472,,Evans EMAD Clear Bass Drum Head 20 Inch,"20"" drum head made using a single ply of 10mil...",46.06,USD,,800940,55131100,...,435472#55131100,1,False,"[3.2268047333, -3.6520571709]",0,[1.0],1,"Drum Head 20 Inch, Evans EMAD Clear Bass Drum...",{'completions': [{'completion': ' Drum Head 20...,Are these two products the same? product 1: Ev...
4,1787,88641225,,"8GB (2x4GB) HyperX Fury Blue DDR3, 1600MHz, CL...","8GB (2x4GB) HyperX Fury Blue DDR3, 1600MHz, CL...",74.99,CAD,,288081,86250508,...,88641225#86250508,0,True,"[-2.2421159744, 2.2641017437]",1,[0.0],0,- 1.5V - 240-pin - Kit de 2 - DDR3 - HyperX -...,{'completions': [{'completion': ' - 1.5V - 240...,Are these two products the same? product 1: 8G...


In [None]:
# get the number of rows were the chatbot respionse is not empty
df[df['chatbot_response'] != ''].shape

(100, 27)

In [None]:
df.tail()

Unnamed: 0.1,Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,...,pair_id,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question
95,2820,31581824,SHIMANO,SPD-SL SH 11 - tacchette,The Shimano SM-SH11 cleats serve as a connecti...,15.33,EUR,,532781,75286119,...,31581824#75286119,0,True,"[-4.3649616241, 4.6570587158]",1,[0.0],0,",CN-HG901-11,QL 11-SPD(RD/MTB/E-BIKE)QUICK-LNK...","{'completions': [{'completion': ',CN-HG901-11,...",Are these two products the same? product 1: SP...
96,4223,6002122,,LOGITECH - STEREO HEADSET H151 ANALOG - EMEAIN...,LOGITECH STEREO HEADSET H151 ANALOG - EMEAIN A...,,,,122031,31273059,...,6002122#31273059,0,True,"[-4.3757534027, 4.6587324142]",1,[0.0],0,"Stereo Headset with Mic for PC, Mac, PS4, PS3...",{'completions': [{'completion': ' Stereo Heads...,Are these two products the same? product 1: LO...
97,2073,393407,Audemars Piguet,Audemars Piguet Royal Oak Chronograph,Audemars Piguet Royal Oak,36560.0,USD,,607611,98604797,...,393407#98604797,0,True,"[-4.1834125519, 4.471534729]",1,[0.0],0,.\n\nA:\n\nThe product 1 is the Royal Oak Chro...,{'completions': [{'completion': '. A: The pr...,Are these two products the same? product 1: Au...
98,3917,89292552,,Shimano SPD SL Cleat Set,Shimano cleats come in 3 different colours the...,39.95,AUD,,431550,75045785,...,89292552#75045785,0,True,"[-3.1417820454, 3.3016252518]",1,[0.0],0,"SET, Black, Size: M, Color: Black, Gender: Me...","{'completions': [{'completion': ' SET, Black, ...",Are these two products the same? product 1: Sh...
99,1910,10830960,,SANDISK EXTREME SDXC 128GB150/70 MB/s V30 UHS-...,,151.98,PLN,,2427670,6664354,...,10830960#6664354,0,True,"[-1.4274802208000001, 1.4668471813]",1,[0.0],0,"- SDSDXV5-128G-GNCIN, I have a Sandisk Extrem...",{'completions': [{'completion': ' - SDSDXV5-12...,Are these two products the same? product 1: SA...


In [50]:
def clean_response(response):
    if "yes" in response.lower():
        return 1
    elif "no" in response.lower():
        return 0
    else:
        return -1

In [51]:
# add a coloumn with a clean version of the chatbot response if the chatresponse contains Yes/yes it will be 1 else if it contains No/no it will be 0 if it is 0 or 1 
df['chatbot_response_clean'] = df['chatbot_response'].apply(lambda x: clean_response(x))

In [52]:
df.head()

Unnamed: 0.1,Unnamed: 0,id_left,brand_left,title_left,description_left,price_left,priceCurrency_left,specTableContent_left,cluster_id_left,id_right,...,label,is_hard_negative,roberta-base_logits,roberta-base_prediction,rsupcon-base_logits,rsupcon-base_prediction,chatbot_response,chatbot_response_raw,chatbot_question,chatbot_response_clean
0,1840,98621092,,TAG Heuer Monaco Chronograph Calibre 11 Automatic,"Design inspired by the watch worn by Steve McQueen in the 1970 film 'Le Mans' Series: Monaco Chronograph, Model: Calibre 11, Gender: Men's Watches, Movement: Automatic, Features: , Dial: Blue, Strap: Black Perforated Leather, Clasp:Deployment/ Folding,Case: Brushed & Polished Stainless Steel/ Glass Back, Case Size: 39mm, Case Thickness: mm, Water Resistant:100m","£4,195.00",,,556904,2004031,...,1,False,"[3.7184028625, -4.1772656441]",0,[1.0],1,", product 3: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 4: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 5: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 6: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 7: TAG Heuer Monaco","{'completions': [{'completion': ', product 3: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 4: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 5: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 6: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 7: TAG Heuer Monaco', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-01'}","Are these two products the same? product 1: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 2: TAG Heuer Men's Special Edition Heuer Monaco Watch",0
1,710,47887901,,"Apple AirPods (2nd Generation) Bluetooth Earbuds w/ Wireless Charging Case, White (MRXJ2AM/A)","Staples.com: Apple AirPods MRXJ2AM/A In the Ear Bluetooth Earbuds, White with fast and free shipping on select orders.",199,USD,,1994510,49097388,...,0,True,"[-1.7439045906000001, 1.717010498]",1,[0.0],0,"\n\nA:\n\nYes, they are the same.\nThe only difference is the color.\n\n","{'completions': [{'completion': ' A: Yes, they are the same. The only difference is the color. ', 'finish_reason': 'end_of_text'}], 'model_version': '2023-01'}","Are these two products the same? product 1: Apple AirPods (2nd Generation) Bluetooth Earbuds w/ Wireless Charging Case, White (MRXJ2AM/A), product 2: Apple In-Ear Wireless Airpods with Charging Case - White-MV7N2ZM/A",1
2,2065,60950283,,Fujifilm Instax Square Instant Film - 10 Photos,The Fujifilm Instax Square Instant Film is for use with the Fuji Instax Square SQ10 Camera. This pack of Fuji Instax Square film contains 10 photos and will produce fantastic mini images at 62x62mm on a film size of 86x72mm. You can also write on the white border with non water based pens.,11.99,GBP,,1457721,25935656,...,0,True,"[-2.1758391857, 2.2021055222]",1,[0.0],0,", I am looking for the black frame.\n\nA:\n\nThe black frame is a separate product.\nThe black frame is a plastic frame that you can use to hold the film in place.\nThe black frame is not included with the film.\nThe black frame is not included with the film.","{'completions': [{'completion': ', I am looking for the black frame. A: The black frame is a separate product. The black frame is a plastic frame that you can use to hold the film in place. The black frame is not included with the film. The black frame is not included with the film.', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-01'}","Are these two products the same? product 1: Fujifilm Instax Square Instant Film - 10 Photos, product 2: Fujifilm Instax Square Film Black Frame 10 Shots",0
3,3869,435472,,Evans EMAD Clear Bass Drum Head 20 Inch,"20"" drum head made using a single ply of 10mil film. Externally Mounted Adjustable Damping system allows the player to adjust attack and focus. Very versatile head for all music genres; a true working drummer's choice. Also available in a coated version. All Evans drum heads are designed, engineered and manufactured in the USA. .",46.06,USD,,800940,55131100,...,1,False,"[3.2268047333, -3.6520571709]",0,[1.0],1,"Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch","{'completions': [{'completion': ' Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch, Evans EMAD Clear Bass Drum Head 20 Inch', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-01'}","Are these two products the same? product 1: Evans EMAD Clear Bass Drum Head 20 Inch, product 2: Evans Evans EMAD Clear Bass",-1
4,1787,88641225,,"8GB (2x4GB) HyperX Fury Blue DDR3, 1600MHz, CL10, 1.5V, 240-pin DIMM, kit of 2","8GB (2x4GB) HyperX Fury Blue DDR3, 1600MHz, CL10, 1.5V, 240-pin DIMM, kit of 2",74.99,CAD,,288081,86250508,...,0,True,"[-2.2421159744, 2.2641017437]",1,[0.0],0,- 1.5V - 240-pin - Kit de 2 - DDR3 - HyperX - Fury - Blue - DDR3 - 1600 - MHz - CAS 10 - 1.5V - 240-pin - Kit de 2 - DDR3 - HyperX - Fury - Blue - DDR3 - 1600 - MHz -,"{'completions': [{'completion': ' - 1.5V - 240-pin - Kit de 2 - DDR3 - HyperX - Fury - Blue - DDR3 - 1600 - MHz - CAS 10 - 1.5V - 240-pin - Kit de 2 - DDR3 - HyperX - Fury - Blue - DDR3 - 1600 - MHz -', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-01'}","Are these two products the same? product 1: 8GB (2x4GB) HyperX Fury Blue DDR3, 1600MHz, CL10, 1.5V, 240-pin DIMM, kit of 2, product 2: DDR3 HyperX Fury Blue - 4 Go 1600 MHz - CAS 10",-1


In [53]:
# Check the accuracy of the chatbot 
df[df['chatbot_response_clean'] == df['label']].shape[0]
print(f"Accuracy: {df[df['chatbot_response_clean'] == df['label']].shape[0] / df.shape[0]}")

Accuracy: 0.18


In [None]:
# Check the number of -1 responses
df[df['chatbot_response_clean'] == -1].shape[0]

9

In [47]:
# load data/results/aleph_alpha/2023-04-24 22:42:34.718717_simple_promt.json into a dataframe
df = pd.read_json('data/results/aleph_alpha/2023-04-24 22:42:34.718717_simple_promt.json')

In [49]:
# Look at 5 complete chatbot responses dont limit the output length
# set the max_rows and max_columns options to None
# set the max_colwidth option to None
pd.set_option('display.max_colwidth', None)

# display the first 5 chatbot responses where chatbot_response_raw is not -1
df['chatbot_response_raw'].head(5)

0    {'completions': [{'completion': ', product 3: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 4: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 5: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 6: TAG Heuer Monaco Chronograph Calibre 11 Automatic, product 7: TAG Heuer Monaco', 'finish_reason': 'maximum_tokens'}], 'model_version': '2023-01'}
1                                                                                                                                                                                                                           {'completions': [{'completion': '

A:

Yes, they are the same.
The only difference is the color.

', 'finish_reason': 'end_of_text'}], 'model_version': '2023-01'}
2                             {'completions': [{'completion': ', I am looking for the black frame.

A:

The black frame is a separate product.
The black frame is a plastic frame that you can use to hold the film in place.
The black fr