In [1]:
%load_ext autoreload
%autoreload 2

## sample model db

In [5]:
from src.openai.requests import input_description, output_description
import pandas as pd

In [7]:
output_description("a model that takes in an image and outputs a caption")

'The output of this AI model would be a text caption that describes the content of the input image. The caption may describe objects, people, locations, or activities in the image. The model may also predict emotions, colors, and other attributes that are relevant to the image content. The caption may be a sentence or a short paragraph and should accurately reflect the most salient features of the image. The ultimate goal of this model is to provide a descriptive and informative context for the input image such that it can be easily understood and interpreted by humans.'

In [61]:
data = pd.read_json(path_or_buf="data/huggingface_models.jsonl", lines=True)
model_db = data[["id","task","description"]].sample(10)

In [62]:
model_db["input_desc"] = model_db.description.apply(input_description)

In [63]:
model_db["output_desc"] = model_db.description.apply(output_description)

In [65]:
# save to json
model_db.to_json("data/model_db.json", orient="records")

## task planning

In [57]:
from src.openai.requests import zero_shot_task_planning

In [55]:
zero_task_plan = zero_shot_task_planning("a model that takes in an image and outputs a audio pronouncing caption")

In [61]:
final = eval(zero_task_plan)

In [62]:
final

[{'task_id': 'task1',
  'task': 'image-captioning',
  'task_description': 'Generate a textual caption from an image.',
  'dep': [],
  'inputs': [{'id': 'input1',
    'input_type': 'image',
    'input_description': 'Input image to the model.'}],
  'outputs': [{'id': 'output1',
    'output_type': 'text',
    'output_description': 'Textual caption generated from the provided image.'}]},
 {'task_id': 'task2',
  'task': 'text-to-speech',
  'task_description': 'Convert textual caption to an audio file',
  'dep': ['task1'],
  'inputs': [{'id': 'input2',
    'input_type': 'text',
    'input_description': 'Textual caption generated from the previous step.'}],
  'outputs': [{'id': 'output2',
    'output_type': 'audio',
    'output_description': 'The audio file that pronounces the textual caption.'}]}]

## input modeling

In [67]:
test = """
we need to make it easy for our employees to add and edit regulations. 
Adding a new regulation requires mutiple checks across thousands of pages of already existing regulations. 
During these checks (which takes months) emplyees look for conflicting regulations made earlier, 
loop holes and blind spots.
find the relevent regulations, paragraphs and articles to the request
check if a conflict occurs
explain the conflict occuring
"""

In [68]:
test_task_plan= zero_shot_task_planning(test)

In [69]:
final = eval(test_task_plan)
final

[{'task_id': 't1',
  'task': 'Convert regulations into searchable format',
  'task_description': 'Transform the existing regulations into a searchable format (e.g. text, pdf, etc.) that can be easily queried by other tasks',
  'dep': [],
  'inputs': [{'id': 'i1',
    'input_type': 'pdf',
    'input_description': 'Existing regulations in PDF format'}],
  'outputs': [{'id': 'o1',
    'output_type': 'text',
    'output_description': 'Regulations in searchable text format'}]},
 {'task_id': 't2',
  'task': 'Retrieve relevant regulations',
  'task_description': "Find the relevant regulations, paragraphs, and articles based on the user's request",
  'dep': ['t1'],
  'inputs': [{'id': 'i2',
    'input_type': 'text',
    'input_description': "User's request text"}],
  'outputs': [{'id': 'o2',
    'output_type': 'text',
    'output_description': 'List of relevant regulations, paragraphs, and articles in text format'}]},
 {'task_id': 't3',
  'task': 'Check for conflicts',
  'task_description': "I

In [3]:
# subject:str, task:str, description:str, inputs:dict, outputs:dict
def problem(**kwargs):
    return str(kwargs)

In [4]:
problem_1 = problem(subject="document embedding", 
        task="text splitting", 
        description="divide text into chunks", 
        inputs="[document text : str]", 
        outputs="[list of sentences : list[str]]")

problem_2 = problem(subject="document embedding", 
        task="text splitting", 
        description="divide text into chunks", 
        inputs="[document text : str]", 
        outputs="[list of sentences : list[str]]")

In [6]:
problems = [
"""
{"subject" : document embedding
task : text splitting
description : divide text into chunks
inputs : [document text : str]
output : list of sentences : list[str]}
""",
"""
"subject" : document embedding
task : pdf to text
description : turn the pdf into text while ignoring images
inputs : [input documentation pdf : pdf]
output : document text : str
""",
"""
"subject" : document embedding
task : sentence embedding
description : embed each sentence
inputs : [list of sentences : list[str]]
output : list of list of embeddings and list of sentences : list[list[str], list[float]]] 
"""]
