# Trying different ways of extracting food parts, their edibility and their removability

In [None]:
# imports
import torch
import torchtext
import gensim
import openai
import json
import os

In [None]:
# food items, possible parts and their connection
parts = ['core', 'shell', 'peel', 'stem']
foods = ['bread', 'citron', 'kumquat', 'cucumber', 'pepper', 'olive', 'lemon', 'tomato', 'orange', 'lime', 'apple', 'bean', 'coconut', 'banana', 'almond', 'pineapple', 'pumpkin', 'cherry', 'strawberry', 'squash']
food_parts = {
    'almond': ['shell'],
    'apple': ['core', 'peel'],
    'banana': ['peel'],
    'cherry': ['core'],
    'citron': ['peel'],
    'coconut': ['shell'],
    'cucumber': ['peel', 'stem'],
    'kumquat': ['peel'],
    'lemon': ['peel'],
    'lime': ['peel'],
    'olive': ['core'],
    'orange': ['peel'],
    'pepper': ['stem'],
    'pineapple': ['core', 'peel'],
    'pumpkin': ['core', 'peel'],
    'squash': ['core', 'peel'],
    'strawberry': ['stem'],
    'tomato': ['peel', 'stem']
}
foods.sort()

In [None]:
#model = "gpt-3.5-turbo-0613"
model = "gpt-4-0613"
cred_path = "your/path/here"

## Food Part Extraction using LLMs (ChatGPT & GPT-4)

In [None]:
json_text = json.load(open(os.path.join(cred_path)))
openai.organization = json_text["organization"]
openai.api_key = json_text["api_key"]
print(model)

for f in foods:
    response = openai.ChatCompletion.create(model=model,
                                            messages=[
                                                {"role": "user", "content": f"Which of the following four food parts do you think are part of a {f}: A core, a shell, a peel and a stem"},
                                                {"role": "system", "content": "Can you please answer only with the existing parts and without any additional text."}],
                                            temperature=0)
    gen_parts = response['choices'][0]['message']['content']
    print(f"{f}: {gen_parts}")

## Edibility and Removal Tool Extraction using LLMs (ChatGPT & GPT-4)

In [None]:
# Part Edibility
json_text = json.load(open(os.path.join(cred_path)))
openai.organization = json_text["organization"]
openai.api_key = json_text["api_key"]
print(model)

for f in food_parts:
    for p in food_parts[f]:
        response = openai.ChatCompletion.create(model=model,
                                                messages=[
                                                    {"role": "user", "content": f"What is the edibility for the {p} of a/an {f}? You can choose between three possibilies: Edible, Must Be Avoided or Should Be Avoided"},
                                                    {"role": "system", "content": "Can you please answer only with the chosen edibility and without any additional text."}],
                                                temperature=0)
        edibility = response['choices'][0]['message']['content']
        print(f"{f} {p}: {edibility}")

In [None]:
# Tool for part removal (open question)
json_text = json.load(open(os.path.join(cred_path)))
openai.organization = json_text["organization"]
openai.api_key = json_text["api_key"]
print(model)

for f in food_parts:
    for p in food_parts[f]:
        response = openai.ChatCompletion.create(model=model,
                                                messages=[
                                                    {"role": "user", "content": f"What tool would you use to remove the {p} of a/an {f}?"},
                                                    {"role": "system", "content": "Can you please answer only with the 1 tool you think works best and without any additional text."}],
                                                temperature=0)
        tool = response['choices'][0]['message']['content']
        print(f"{f} {p}: {tool}")

In [None]:
# Tool for part removal (selection of tools provided)
json_text = json.load(open(os.path.join(cred_path)))
openai.organization = json_text["organization"]
openai.api_key = json_text["api_key"]
print(model)

for f in food_parts:
    for p in food_parts[f]:
        response = openai.ChatCompletion.create(model=model,
                                                messages=[
                                                    {"role": "user", "content": f"What tool would you use to remove the {p} of a/an {f} if you would have to choose between a knife, a nutcracker, a hand, a spoon or a peeler?"},
                                                    {"role": "system", "content": "Can you please answer only with the 1 tool you think works best and without any additional text."}],
                                                temperature=0)
        tool = response['choices'][0]['message']['content']
        print(f"{f} {p}: {tool}")

## Food Part Extraction using Embeddings (GloVe, Numberbatch & NASARI)

In [None]:
# load ConceptNet Numberbatch
numberbatch = gensim.models.KeyedVectors.load_word2vec_format('/home/jan-philipp/Word Embeddings/numberbatch-en.txt', binary=False)

In [None]:
# load NASARI
nasari = gensim.models.KeyedVectors.load_word2vec_format('/home/jan-philipp/Word Embeddings/NASARI_embed_english.txt', binary=False)

In [None]:
# define function for finding the key based on the given concept name
def find_key(concept):
    concept_is_synset = "bn:" in concept
    keys = [key for key in nasari.index_to_key if concept in key.lower()]
    for key in keys:
        cut = key.split('__')[1].lower()
        if (cut == concept and not concept_is_synset) or (concept_is_synset and concept in key.lower()):
            return key
    return concept

In [None]:
# cosine similarity between GloVe embeddings
glove = torchtext.vocab.GloVe(name="6B", dim=50)

for f in foods:
    for p in parts:
        sim = torch.cosine_similarity(glove[f].unsqueeze(0), glove[p].unsqueeze(0)).item()
        if sim >= 0.5:
            print(f'Similarity for {p} in {f}: {sim}')

In [None]:
# cosine similarity between ConceptNet Numberbatch embeddings
for f in foods:
    for p in parts:
        sim = numberbatch.similarity(f, p)
        if sim >= 0.3:
            print(f'Similarity for {p} in {f}: {sim}')

In [None]:
# cosine similarity between NASARI embeddings
# Sadly, the BabelNet synsets for core (bn:04772260n) does not exist in the NASARI embeddings and 
# for 'shell' we need to look for the concrete synset (bn:00071005n) instead 
parts_nasari = parts = ['bn:00071005n', 'peel_(fruit)', 'plant_stem']
for f in foods:
    for p in parts_nasari:
        f_key = find_key(f)
        p_key = find_key(p)
        if (f_key in nasari.index_to_key) and (p_key in nasari.index_to_key):
            sim = nasari.similarity(f_key, p_key)
            if sim >= 0.75:
                print(f'Similarity for {p} and {f}: {sim}')