In [1]:
%load_ext autoreload
%autoreload 2

## sample model db

In [2]:
from src.openai.requests import input_description, output_description
import pandas as pd

In [3]:
data = pd.read_json(path_or_buf="data/huggingface_models.jsonl", lines=True)
model_db = data[["id","task","description"]].sample(10)

In [None]:
model_db["input_desc"] = model_db.description.apply(input_description)

In [None]:
model_db["output_desc"] = model_db.description.apply(output_description)

In [None]:
# save to json
model_db.to_json("data/model_db.json", orient="records")

## task planning

In [5]:
from src.openai.requests import zero_shot_task_planning

In [6]:
zero_task_plan = zero_shot_task_planning("a model that takes as input text from user change it to a poem and classify it as positive emotion or negative emotion")

In [7]:
final = eval(zero_task_plan)

In [8]:
final

[{'task_id': 'task_1',
  'task': 'text-classification',
  'task_description': 'Classify user input as positive or negative emotion.',
  'dep': [],
  'inputs': [{'id': 'input_1',
    'input_type': 'text',
    'input_description': 'User provided text.'}],
  'outputs': [{'id': 'output_1',
    'output_type': 'text',
    'output_description': 'Class of sentiment'}]},
 {'task_id': 'task_2',
  'task': 'text2text-generation',
  'task_description': 'Convert user input to a poem.',
  'dep': ['task_1'],
  'inputs': [{'id': 'input_2',
    'input_type': 'text',
    'input_description': 'User provided text.'}],
  'outputs': [{'id': 'output_2',
    'output_type': 'text',
    'output_description': 'Poem generated.'}]},
 {'task_id': 'task_3',
  'task': 'text-classification',
  'task_description': 'Classify poem as positive or negative emotion.',
  'dep': ['task_2'],
  'inputs': [{'id': 'input_3',
    'input_type': 'text',
    'input_description': 'Poem generated.'}],
  'outputs': [{'id': 'output_3',
  

## input modeling

In [15]:
test = """
we need to make it easy for our employees to add and edit regulations. 
Adding a new regulation requires mutiple checks across thousands of pages of already existing regulations. 
During these checks (which takes months) emplyees look for conflicting regulations made earlier, 
loop holes and blind spots.
find the relevent regulations, paragraphs and articles to the request
check if a conflict occurs
explain the conflict occuring
"""

In [16]:
test_task_plan= zero_shot_task_planning(test)

In [17]:
final = eval(test_task_plan)
final

{'tasks': [{'task_id': 1,
   'task': 'convert-pdf-to-text',
   'task_description': 'Convert existing regulations in pdf format into a text format.',
   'dep': [],
   'inputs': [{'id': 1,
     'input_type': 'pdf',
     'input_description': 'List of regulations in pdf format.'}],
   'outputs': [{'id': 2,
     'output_type': 'txt',
     'output_description': 'List of regulations in text format.'}]},
  {'task_id': 2,
   'task': 'text2text-generation',
   'task_description': 'Find the relevant paragraphs and articles based on the user request.',
   'dep': [1],
   'inputs': [{'id': 2,
     'input_type': 'txt',
     'input_description': 'List of regulations in text format.'}],
   'outputs': [{'id': 3,
     'output_type': 'txt',
     'output_description': 'List of relevant paragraphs and articles.'}]},
  {'task_id': 3,
   'task': 'text2text-generation',
   'task_description': 'Check if any conflicts occur among the relevant paragraphs and articles.',
   'dep': [2],
   'inputs': [{'id': 3,
    

In [None]:
# subject:str, task:str, description:str, inputs:dict, outputs:dict
def problem(**kwargs):
    return str(kwargs)

In [None]:
problem_1 = problem(subject="document embedding", 
        task="text splitting", 
        description="divide text into chunks", 
        inputs="[document text : str]", 
        outputs="[list of sentences : list[str]]")

problem_2 = problem(subject="document embedding", 
        task="text splitting", 
        description="divide text into chunks", 
        inputs="[document text : str]", 
        outputs="[list of sentences : list[str]]")

In [None]:
problems = [
"""
{"subject" : document embedding
task : text splitting
description : divide text into chunks
inputs : [document text : str]
output : list of sentences : list[str]}
""",
"""
"subject" : document embedding
task : pdf to text
description : turn the pdf into text while ignoring images
inputs : [input documentation pdf : pdf]
output : document text : str
""",
"""
"subject" : document embedding
task : sentence embedding
description : embed each sentence
inputs : [list of sentences : list[str]]
output : list of list of embeddings and list of sentences : list[list[str], list[float]]] 
"""]


## model selection

In [31]:
from src.model_selection.model_selection import SemanticSearchEngine
from src.examples.test_task_plans import test_task_plan_1, test_task_plan_2, test_task_plan_3
from src.utils.utils import read_json

In [10]:
model_db = read_json("data/model_db.json")

In [13]:
test_task_plan_2

[{'id': 't1',
  'task': 'Convert regulations into searchable format',
  'task_description': 'Transform the existing regulations into a searchable format (e.g. text, pdf, etc.) that can be easily queried by other tasks',
  'dep': [],
  'inputs': [{'id': 'i1',
    'input_type': 'pdf',
    'input_description': 'Existing regulations in PDF format'}],
  'outputs': [{'id': 'o1',
    'output_type': 'text',
    'output_description': 'Regulations in searchable text format'}]},
 {'id': 't2',
  'task': 'Retrieve relevant regulations',
  'task_description': "Find the relevant regulations, paragraphs, and articles based on the user's request",
  'dep': ['t1'],
  'inputs': [{'id': 'i2',
    'input_type': 'text',
    'input_description': "User's request text"}],
  'outputs': [{'id': 'o2',
    'output_type': 'text',
    'output_description': 'List of relevant regulations, paragraphs, and articles in text format'}]},
 {'id': 't3',
  'task': 'Check for conflicts',
  'task_description': "Identify potenti

In [20]:
final = final['tasks']

In [33]:
final

[{'task_id': 1,
  'task': 'convert-pdf-to-text',
  'task_description': 'Convert existing regulations in pdf format into a text format.',
  'dep': [],
  'inputs': [{'id': 1,
    'input_type': 'pdf',
    'input_description': 'List of regulations in pdf format.'}],
  'outputs': [{'id': 2,
    'output_type': 'txt',
    'output_description': 'List of regulations in text format.'}]},
 {'task_id': 2,
  'task': 'text2text-generation',
  'task_description': 'Find the relevant paragraphs and articles based on the user request.',
  'dep': [1],
  'inputs': [{'id': 2,
    'input_type': 'txt',
    'input_description': 'List of regulations in text format.'}],
  'outputs': [{'id': 3,
    'output_type': 'txt',
    'output_description': 'List of relevant paragraphs and articles.'}]},
 {'task_id': 3,
  'task': 'text2text-generation',
  'task_description': 'Check if any conflicts occur among the relevant paragraphs and articles.',
  'dep': [2],
  'inputs': [{'id': 3,
    'input_type': 'txt',
    'input_de

In [32]:
se = SemanticSearchEngine()
se.forward(model_db=model_db, task_db=final)

[['tuner007/pegasus_summarizer',
  'bigscience/bloom',
  'facebook/textless_sm_it_fr',
  'cafeai/cafe_aesthetic',
  'tufa15nik/vilt-finetuned-vqasi'],
 ['tuner007/pegasus_summarizer',
  'facebook/textless_sm_it_fr',
  'bigscience/bloom',
  'cafeai/cafe_aesthetic',
  'tufa15nik/vilt-finetuned-vqasi'],
 ['bigscience/bloom',
  'facebook/textless_sm_it_fr',
  'tufa15nik/vilt-finetuned-vqasi',
  'tuner007/pegasus_summarizer',
  'cafeai/cafe_aesthetic']]