In [None]:
%pip install transformers
%pip install peft
%pip install sentence-transformers

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from peft import PeftModel, PeftConfig, get_peft_model
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
from transformers import pipeline
import torch
import numpy as np

s_model = SentenceTransformer('bert-base-nli-mean-tokens')

example = "Give me 100 images similar to this one"
e = ''.join([i for i in example if not i.isdigit()] )

def sbi(request, threshold = 0.7):
  t = ''.join([i for i in request if not i.isdigit()] )
  c = cosine_similarity(s_model.encode(e).reshape(1,-1),(s_model.encode(t).reshape(1,-1)))
  if c>threshold:
    return True
  return False

model_id = "myrtotsok/distilbert-base-uncased-EO-intent-classifier-6"

# load peft model from hub for inference
config = PeftConfig.from_pretrained(model_id)

# define label maps
id2label = {0: 'binary visual question answering', 1:'image search by text',
            2:'count/extract/segment'}

label2id = {'binary visual question answering':0, 'image search by text':1,
            'count/extract/segment':2 }

inference_model = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path, num_labels=3, id2label=id2label, label2id=label2id
)
clf_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
clf_model = PeftModel.from_pretrained(inference_model, model_id)

def classifier(request):
    inputs = clf_tokenizer.encode(request, return_tensors="pt")
    logits = clf_model(inputs).logits
    predictions = torch.max(logits,1).indices
    return predictions

ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

nlp = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer)

def geospatial(request):
  gsp = False
  ner_results = nlp(request)
  for result in ner_results:
    if result['entity']=='B-LOC':
      gsp = True
      break
  return gsp

def intent_classification(request):
  label = []
  intent_labels = ['binary visual question answering', 'image search by text',
            'count/extract/segment', 'search by image', 'geospatial']
  intent_vector = np.zeros(5, dtype= int)

  # NOTE : intent_vector can be used for integration in API

  if not sbi(request):
    intent_vector[classifier(request)]=1
  else:
    intent_vector[3] = 1

  if geospatial(request):
    intent_vector[4]=1

  label_indeces = np.where((intent_vector) == 1)[0].tolist()
  for l in label_indeces:
    label.append(intent_labels[l])

  return ', '.join(label)

In [3]:
intent_classification('Show me all images with ships, within 100 km from the port of Genoa')

'image search by text, geospatial'