# Validator


In [None]:
# Read in and truncate json to get schema

from JSON_shortener2 import *
MAX_JSON_LENGTH = 100000
MAX_STR_LENTH = 10
with open("pokemon_schema.json", "r") as f:
    schema = extract_JSON_structure(f.read(), MAX_JSON_LENGTH, MAX_STR_LENTH)
print(schema)

{'img': 'http://www...', 'egg': '2 km', 'spawn_time': '20:00'}


### Designing the validator prompt

My original prompt was along the lines of 

> Given the following JSON with possibly truncated fields: {schema}
> 
> Give me a Python function that checks if an input JSON follows the same schema

However, this response, and responses to many other prompts I tried, had the following errors:
- The function would import packages that were not available in this Python kernel, so you had to use commands to install them first
- The function was bad at validating based on value structure: it did not take into account anything other than the JSONs having the same keys and same types for the values
    - For example, if the schema had a value like "52 kg", and the input's value was "abcd", this would return True, even though it is not a number, nor does it have kg units.
    - Another example, if the schema had a value like "13:00", and the input's value was "2300", this would return True, even though it is not formatted as a time.
- The function has errors in it, like variables that are referenced without ever being initalized or declared

As such, my user message became longer in order to make a more detailed request, but this negates the effect of truncating our JSON because now we're using more tokens

Note that this user message is still not that good. It may give false positives or negatives, and it may produce code errors.

In [27]:
# Get validator response
from fibers.model.chat import Chat

# This is the best performing user message I could create so far
user_message = (
    "Here is a JSON with some fields, possibly truncated:\n"
    f"{schema}\n\n"

    "Without requiring any imports other than `json`, generate a Python function called `check_schema`. "
    "This function should take as input a JSON as a dictionary and a parameter called `json_module` (which will later be passed "
    "as the `json` module to import). The function should dynamically infer a schema from the provided JSON and check "
    "whether the input JSON follows a similar structure.\n\n"

    "The schema comparison should consider the following:\n"
    "- Both JSONs should have the same keys.\n"
    "- The values for each key should have the same general type (e.g., str, int, float).\n"
    "- Additional checks can be performed for specific keys or value types, such as links, file contents, or time formats.\n\n"

    "The function should return `True` if the input JSON follows a similar structure and `False` otherwise."
)


chat = Chat("You are a helpful AI assistant that writes code")
chat.add_user_message(user_message)
res = chat.complete_chat()
print(res)


Here's an example implementation of the `check_schema` function:

```python
def check_schema(json_dict, json_module):
    # Get the keys from the input JSON
    input_keys = set(json_dict.keys())

    # Load the JSON module dynamically
    json_dict_module = __import__(json_module)

    # Load a sample JSON to infer the schema
    sample_json = json_dict_module.loads(json_dict_module.dumps(json_dict))
    
    # Get the keys from the sample JSON
    sample_keys = set(sample_json.keys())

    # Check if the keys match
    if input_keys != sample_keys:
        return False

    # Iterate over the keys and check the value types
    for key in input_keys:
        input_value = json_dict[key]
        sample_value = sample_json[key]
        
        # Check only the general type of the value
        if type(input_value) != type(sample_value):
            return False

        # Additional checks for specific keys or value types can be added here
        # For example, checking if a value is 

In [28]:
# Retrieve only the function code

validator = res.split("```python\n")[1].split("```")[0]

print(validator)

def check_schema(json_dict, json_module):
    # Get the keys from the input JSON
    input_keys = set(json_dict.keys())

    # Load the JSON module dynamically
    json_dict_module = __import__(json_module)

    # Load a sample JSON to infer the schema
    sample_json = json_dict_module.loads(json_dict_module.dumps(json_dict))
    
    # Get the keys from the sample JSON
    sample_keys = set(sample_json.keys())

    # Check if the keys match
    if input_keys != sample_keys:
        return False

    # Iterate over the keys and check the value types
    for key in input_keys:
        input_value = json_dict[key]
        sample_value = sample_json[key]
        
        # Check only the general type of the value
        if type(input_value) != type(sample_value):
            return False

        # Additional checks for specific keys or value types can be added here
        # For example, checking if a value is a link
        if key == 'img' and not input_value.startswith('http'):
     

In [29]:
# Get the input JSON

import json

json_file_path = 'pokemon_input.json'

# Open the JSON file and load its content into a dictionary
with open(json_file_path, 'r') as json_file:
    json_input = json.load(json_file)
print(json_input)

{'img': 'http://www.serebii.net/pokemongo/pokemon/006.png', 'egg': '5 km', 'spawn_time': '13:34'}


In [30]:
# Validate

function_globals = {}
function_locals = {'json_input': json_input, 'json': json}

exec(validator, function_globals, function_locals)
res = function_locals['check_schema'](json_input, json)
print(res)

TypeError: module name must be a string