In [None]:
import spacy
import en_core_web_sm
!pip install sentence-transformers
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('bert-base-nli-mean-tokens')

from sklearn.metrics.pairwise import cosine_similarity

In [None]:
users_input = {'text' :'',
               'image' : ''}

output_to_engine = {'engine': '',
                    'request':
                     {'text':'',
                      'image':''}
                    }
engine_output = {'text': '', 'link': ''} # ... waiting for engines output formats to be decided

output_to_user = {'answer': ''}

# Engine Selection - Step 1
# (format - based)

In [None]:
def user_input_format(user_input):
  if user_input['image']!='':
    if user_input['text']=='':
      return 'image'
    else:
      return 'text + image'
  if user_input['text']!='':
    return 'text'

# Engine Selection 2a

In [None]:
SbI_list = ['Show me images similar to this one.', 'Find me images that look like this one.']
vQA_list = ['How many vessels does this image show?', 'Is this a rural or an urban area?']

SbI_embeddings = model.encode(SbI_list)
vQA_embeddings = model.encode(vQA_list)

In [None]:
def engine_selection_2a(text):
  text_embeddings = [model.encode(text)]

  if cosine_similarity(text_embeddings,SbI_embeddings[:]).max() > cosine_similarity(text_embeddings,vQA_embeddings[:]).max():
    return 'SbI'
  else:
    return 'vQA'

# Engine Selection 2b

In [None]:
def request_disambiguation(text):
  disambiguation = {'need' : False,
                    'message' : '' }
  if ' near ' in users_input['text']:
    disambiguation['need'] = True
    disambiguation['message'] = "Can you repeat your question replacing 'near' with a specific distance, please?"

  return disambiguation

In [None]:
def existence_of_geographical_object(textual_input):
  geo_object_presense = False
  nlp = en_core_web_sm.load()
  doc = nlp(textual_input)

  for X in doc.ents:
    if X.label_ in ['GPE','FAC','LOC']:
      geo_object_presense = True
      break

  return geo_object_presense

In [None]:
def engine_selection_2b():
  if existence_of_geographical_object(users_input['text']) == True:
    return 'EarthQA'
  else:
    return 'SbT'

## chat/textual engine decision

In [None]:
chat_list = ['Thank you!', 'This was all I wanted']
engine_list = ['Show me images containing vessels', 'Find me Sentinel-2 satellite images that show Mount Etna, have been taken in February 2021 and have cloud cover less than 10%).']

In [None]:
chat_embeddings = model.encode(chat_list)
engine_embeddings = model.encode(engine_list)

In [None]:
def request_to_textual_engine(text):
  # when we have a dataset of user's requests, we can implement this function via a binary classifier
  request = False
  # find users request's embedding
  text_embeddings = [model.encode(text)]

  if cosine_similarity(text_embeddings,chat_embeddings[:]).max() < cosine_similarity(text_embeddings,engine_embeddings[:]).max():
    request = True

  return request

# Response enhancement

In [None]:
def response_enhancement(engine, answer):
#... waiting for engines output formats to be decided
  return

# crucial function

In [None]:
def digital_assistant_to_engine(users_input):

  #Engine Selection step 1
  # path 'text with image'
  if  user_input_format(users_input) == 'text + image':
    # Engine Selection 2a
    output_to_engine['engine'] = engine_selection_2a(users_input['text'])
    output_to_engine['request'] = users_input

  #path 'textual'
  elif user_input_format(users_input) == 'text':
    # decide between chat and textual engine
    if request_to_textual_engine(users_input['text']) == False :
      output_to_engine['engine'] = 'conversational'
      output_to_engine['request'] = users_input
    else:
      # asking for clarifications
      if request_disambiguation['need'] == True:
        return request_disambiguation['message']

      # Engine Selection 2b
      output_to_engine['engine'] = engine_selection_2b(users_input['text'])
      output_to_engine['request'] = users_input

  return output_to_engine

# main ()

In [1]:
def main():
    messages = [{"role": "assistant", "content": "Welcome to DA4DTE! Please enter your request.",
                         'image': ''}]
    while True:
        # read users_input file
        messages.append({"role": "user", "content": users_input['text'],
                         'image': users_input['image']})

        if type(digital_assistant_to_engine(users_input))== str: # meaning that disambiguation is needed
          answer = digital_assistant_to_engine(users_input)

        else:
          engine_input = digital_assistant_to_engine(users_input)
          # the engine_input json file is available
          #[... waiting for the engine to respond ...]
          # TI reads engine_output json file
          answer = response_enhancement(engine_output)
          # the output_to_user file is available

        messages.append({"role": "assistant", "content": answer, "image":''})