In [1]:
%load_ext autoreload
%autoreload 2

## sample model db

In [2]:
from src.openai.requests import input_description, output_description
import pandas as pd

In [None]:
data = pd.read_json(path_or_buf="data/huggingface_models.jsonl", lines=True)
model_db = data[["id","task","description"]].sample(10)

In [None]:
model_db["input_desc"] = model_db.description.apply(input_description)

In [None]:
model_db["output_desc"] = model_db.description.apply(output_description)

In [None]:
# save to json
model_db.to_json("data/model_db.json", orient="records")

## task planning

In [3]:
from src.task_planning.task_planning import zero_shot_task_planning

In [4]:
zero_task_plan = zero_shot_task_planning("a model that takes as input text from user change it to a poem and classify it as positive emotion or negative emotion")

In [5]:
final = eval(zero_task_plan)

In [6]:
final

[{'task_id': 1,
  'task': 'text-classification',
  'task_description': 'Classify given text as positive or negative emotion',
  'dep': [],
  'inputs': [{'id': 1,
    'input_type': 'text',
    'input_description': 'Text input to classify'}],
  'outputs': [{'id': 2,
    'output_type': 'text',
    'output_description': "Classified emotion ('positive' or 'negative')"}]},
 {'task_id': 2,
  'task': 'text2text-generation',
  'task_description': 'Generate a poem from given text input',
  'dep': [1],
  'inputs': [{'id': 1,
    'input_type': 'text',
    'input_description': 'Text input to generate poem'}],
  'outputs': [{'id': 3,
    'output_type': 'text',
    'output_description': 'Generated poem'}]},
 {'task_id': 3,
  'task': 'text-classification',
  'task_description': 'Classify generated poem as positive or negative emotion',
  'dep': [2],
  'inputs': [{'id': 3,
    'input_type': 'text',
    'input_description': 'Generated poem to classify'}],
  'outputs': [{'id': 4,
    'output_type': 'text

## input modeling

In [13]:
test = """
we need to make it easy for our employees to add and edit regulations. 
Adding a new regulation requires multiple checks across thousands of pages of already existing regulations. 
During these checks (which takes months) employees look for conflicting regulations made earlier, 
loop holes and blind spots.
find the relevant regulations, paragraphs and articles to the request
check if a conflict occurs
explain the conflict occurring
"""

In [14]:
test_task_plan= zero_shot_task_planning(test)

In [None]:
final = eval(test_task_plan)
final

In [None]:
# subject:str, task:str, description:str, inputs:dict, outputs:dict
def problem(**kwargs):
    return str(kwargs)

In [None]:
problem_1 = problem(subject="document embedding", 
        task="text splitting", 
        description="divide text into chunks", 
        inputs="[document text : str]", 
        outputs="[list of sentences : list[str]]")

problem_2 = problem(subject="document embedding", 
        task="text splitting", 
        description="divide text into chunks", 
        inputs="[document text : str]", 
        outputs="[list of sentences : list[str]]")

In [None]:
problems = [
"""
{"subject" : document embedding
task : text splitting
description : divide text into chunks
inputs : [document text : str]
output : list of sentences : list[str]}
""",
"""
"subject" : document embedding
task : pdf to text
description : turn the pdf into text while ignoring images
inputs : [input documentation pdf : pdf]
output : document text : str
""",
"""
"subject" : document embedding
task : sentence embedding
description : embed each sentence
inputs : [list of sentences : list[str]]
output : list of list of embeddings and list of sentences : list[list[str], list[float]]] 
"""]


## model selection

In [7]:
from src.model_selection.model_selection import SemanticSearchEngine
from src.task_planning.test_task_plans import test_task_plan_1, test_task_plan_2, test_task_plan_3
from src.utils.utils import read_json

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
model_db = read_json("data/model_db.json")

In [19]:
test_task_plan

{'tasks': [{'task_id': '1',
   'task': 'find_relevant_regulations',
   'task_description': 'Find relevant regulations, paragraphs and articles to the request',
   'dep': [],
   'inputs': [{'id': '1a',
     'input_type': 'text',
     'input_description': 'Regulation search query'}],
   'outputs': [{'id': '2a',
     'output_type': 'text',
     'output_description': 'List of relevant regulations'}]},
  {'task_id': '2',
   'task': 'check_for_conflict',
   'task_description': 'Check if a conflict occurs within the relevant regulations found in the previous task',
   'dep': ['1'],
   'inputs': [{'id': '2a',
     'input_type': 'text',
     'input_description': 'List of relevant regulations'}],
   'outputs': [{'id': '3a',
     'output_type': 'text',
     'output_description': 'List of conflicting regulations'}]},
  {'task_id': '3',
   'task': 'explain_conflict',
   'task_description': 'Explain the conflict occurring',
   'dep': ['2'],
   'inputs': [{'id': '3a',
     'input_type': 'text',
     

In [21]:
se = SemanticSearchEngine()
se.forward(model_db=model_db, task_db=final)

[['facebook/textless_sm_it_fr',
  'cafeai/cafe_aesthetic',
  'tuner007/pegasus_summarizer',
  'keras-io/deeplabv3p-resnet50',
  'nateraw/food'],
 ['tuner007/pegasus_summarizer',
  'facebook/textless_sm_it_fr',
  'm3hrdadfi/wav2vec2-large-xlsr-persian-v3',
  'facebook/wav2vec2-large-960h-lv60-self',
  'bigscience/bloom'],
 ['cafeai/cafe_aesthetic',
  'tuner007/pegasus_summarizer',
  'facebook/textless_sm_it_fr',
  'm3hrdadfi/wav2vec2-large-xlsr-persian-v3',
  'nateraw/food']]