In [1]:
import json
import jq
from jaiqu import validate_schema, translate_schema

### Desired data format 

Create a `jsonschema` dictionary for the format of data you want. Data extracted from your input will be extracted into this format.

In [2]:
schema = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "properties": {
        "id": {
            "type": ["string", "null"],
            "description": "A unique identifier for the record."
        },
        "date": {
            "type": "string",
            "description": "A string describing the date."
        },
        "model": {
            "type": "string",
            "description": "A text field representing the model used."
        }
    },
    "required": [
        "id",
        "date"
    ]
}

### Sample input data
Provoide an input JSON dictionary containing the data you want to extract.

In [3]:
input_json = {
    "call.id": "123",
    "datetime": "2022-01-01",
    "timestamp": 1640995200,
    "Address": "123 Main St",
    "user": {
        "name": "John Doe",
        "age": 30,
        "contact": "john@email.com"
    }
}

### (Optional) Create hints
The jaiqu agent may not know certain concepts. For example, you might want to have some keys interpreted a certain way (i.e. interpret "contact" as "email"). For tricky interpretations, create hints.

In [4]:
key_hints = "We are processing outputs of an containing an id and a date of a user."

In [5]:
schema_properties, valid = validate_schema(input_json, schema, key_hints=key_hints, openai_api_key='key')

Validating schema:   0%|          | 0/3 [00:00<?, ?it/s]

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
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
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


### Verify schema
Verify the input JSON contains the keys and values requested in your schema

In [6]:
print('Schema is valid:',valid)
print('-'*10)
print(json.dumps(schema_properties, indent=2))

Schema is valid: True
----------
{
  "id": {
    "identified": true,
    "key": "call.id",
    "message": "\"id\" | \"call.id\" : The field name is slightly different but carries the same concept of a unique identifier. Considering the description given, \"A unique identifier for the record\", this aligns well with the concept of an 'id', therefore it counts. Extracted key: `call.id`",
    "type": [
      "string",
      "null"
    ],
    "description": "A unique identifier for the record.",
    "required": true
  },
  "date": {
    "identified": true,
    "key": "datetime",
    "message": "\"date\" | \"datetime\" : The field name is different but both are associated with the concept of time, more precisely the definition talks about a string describing the date, which matches with the function of the 'datetime' key. Extracted key: `datetime`",
    "type": "string",
    "description": "A string describing the date.",
    "required": true
  },
  "model": {
    "identified": false,
    "

In [7]:
jq_query = translate_schema(input_json, schema, key_hints=key_hints, max_retries=20, openai_api_key='key')

Validating schema:   0%|          | 0/3 [00:00<?, ?it/s]

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
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
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


Translating schema:   0%|          | 0/2 [00:00<?, ?it/s]

Retry attempts:   0%|          | 0/20 [00:00<?, ?it/s]

Validation attempts:   0%|          | 0/20 [00:00<?, ?it/s]

RuntimeError: Failed to validate the jq filter after 20 retries.

## Finalized jq query

In [None]:
jq_query

### Check the jq query results

In [None]:
result = jq.compile(jq_query).input(input_json).all()
print(jq.compile(jq_query).input(input_json).all())