## Entities

In [24]:
import pandas as pd
from itertools import product

entities = ["singer", "teacher", "town"]
question_templates = ["Give me {}", "What is {}", "Tell me about {}", "What do you know about {}"]


def create_simple_queries_and_questions(entities, question_templates):
    queries = []
    questions = []

    for entity, template in product(entities, question_templates):
        question = template.format(entity)
        query = f"query {{ {entity} }}"
        queries.append(query)
        questions.append(question)

    data = {'Question': questions, 'Query': queries}
    df = pd.DataFrame(data)

    return df

df = create_simple_queries_and_questions(entities, question_templates)
print(df)



                          Question              Query
0                   Give me singer   query { singer }
1                   What is singer   query { singer }
2             Tell me about singer   query { singer }
3    What do you know about singer   query { singer }
4                  Give me teacher  query { teacher }
5                  What is teacher  query { teacher }
6            Tell me about teacher  query { teacher }
7   What do you know about teacher  query { teacher }
8                     Give me town     query { town }
9                     What is town     query { town }
10              Tell me about town     query { town }
11     What do you know about town     query { town }


## Entities, and properties

In [26]:
import pandas as pd
from itertools import product

entities = ["singer", "teacher", "town"]
properties = ["children", "friend", "rival"]
question_templates = ["Give me {}", "What is {}", "Tell me about {}", "What do you know about {}"]

def create_simple_queries_and_questions(entities, properties, question_templates):
    queries = []
    questions = []

    all_properties = properties + [entity for entity in entities if entity not in properties]

    for entity, property_, template in product(entities, all_properties, question_templates):
        if entity != property_:  # Exclude cases where entity is used as its own property
            question_entity = f"the {entity}'s {property_}"
            question = template.format(question_entity)
            query = f"query {entity} {{ {property_} }}"
            queries.append(query)
            questions.append(question)

    data = {'Question': questions, 'Query': queries}
    df = pd.DataFrame(data)

    return df

df = create_simple_queries_and_questions(entities, properties, question_templates)
print(df)


                                         Question                       Query
0                   Give me the singer's children   query singer { children }
1                   What is the singer's children   query singer { children }
2             Tell me about the singer's children   query singer { children }
3    What do you know about the singer's children   query singer { children }
4                     Give me the singer's friend     query singer { friend }
5                     What is the singer's friend     query singer { friend }
6               Tell me about the singer's friend     query singer { friend }
7      What do you know about the singer's friend     query singer { friend }
8                      Give me the singer's rival      query singer { rival }
9                      What is the singer's rival      query singer { rival }
10               Tell me about the singer's rival      query singer { rival }
11      What do you know about the singer's rival      query sin

## Entities, nested properties

In [27]:
import pandas as pd
from itertools import product

entities = ["singer", "teacher", "town"]
properties = ["children", "friend", "rival"]
question_templates = ["Give me {}", "What is {}", "Tell me about {}", "What do you know about {}"]
nested_properties = ["age", "profession", "hobby", "salary", "height", "weight", "hair_color", "eye_color"]

def create_simple_queries_and_questions(entities, properties, question_templates, nested_properties):
    queries = []
    questions = []

    all_properties = properties + [entity for entity in entities if entity not in properties]

    for entity, property_, template in product(entities, all_properties, question_templates):
        if entity != property_:
            for nested_property in nested_properties:
                question_entity = f"the {entity}'s {property_}'s {nested_property}"
                question = template.format(question_entity)
                query = f"query {entity} {{ {property_} {{ {nested_property} }} }}"
                queries.append(query)
                questions.append(question)

    data = {'Question': questions, 'Query': queries}
    df = pd.DataFrame(data)

    return df

df = create_simple_queries_and_questions(entities, properties, question_templates, nested_properties)
print(df)


                                              Question  \
0                  Give me the singer's children's age   
1           Give me the singer's children's profession   
2                Give me the singer's children's hobby   
3               Give me the singer's children's salary   
4               Give me the singer's children's height   
..                                                 ...   
475  What do you know about the town's teacher's sa...   
476  What do you know about the town's teacher's he...   
477  What do you know about the town's teacher's we...   
478  What do you know about the town's teacher's ha...   
479  What do you know about the town's teacher's ey...   

                                        Query  
0           query singer { children { age } }  
1    query singer { children { profession } }  
2         query singer { children { hobby } }  
3        query singer { children { salary } }  
4        query singer { children { height } }  
..             

### Entities, filtering on properties

In [35]:
import pandas as pd
from itertools import product

# Set display options to show all rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)

entities = ["singer", "teacher", "town"]
properties = ["children", "friend", "rival"]
simple_logical_operators = ["_gt", "_lt", "_eq", "_neq"]
question_templates = ["What are the {} where {}", "What is the {} that {}", "Tell me about {} for the {}", "What do you know about {} that has {}", "Give me {} in which {}"]

def create_simple_queries_and_questions(entities, properties, simple_logical_operators, question_templates):
    queries = []
    questions = []

    for entity, property_, operator, template in product(entities, properties, simple_logical_operators, question_templates):
        condition_placeholder = f"{property_} : {{ {operator}: <value> }}"
        question = template.format(entity, condition_placeholder)
        query = f"query {{ {entity} (where : {{ {condition_placeholder} }}) }}"
        queries.append(query)
        questions.append(question)

    data = {'Question': questions, 'Query': queries}
    df = pd.DataFrame(data)

    return df

df = create_simple_queries_and_questions(entities, properties, simple_logical_operators, question_templates)

df


Unnamed: 0,Question,Query
0,What are the singer where children : { _gt: <v...,query { singer (where : { children : { _gt: <v...
1,What is the singer that children : { _gt: <val...,query { singer (where : { children : { _gt: <v...
2,Tell me about singer for the children : { _gt:...,query { singer (where : { children : { _gt: <v...
3,What do you know about singer that has childre...,query { singer (where : { children : { _gt: <v...
4,Give me singer in which children : { _gt: <val...,query { singer (where : { children : { _gt: <v...
5,What are the singer where children : { _lt: <v...,query { singer (where : { children : { _lt: <v...
6,What is the singer that children : { _lt: <val...,query { singer (where : { children : { _lt: <v...
7,Tell me about singer for the children : { _lt:...,query { singer (where : { children : { _lt: <v...
8,What do you know about singer that has childre...,query { singer (where : { children : { _lt: <v...
9,Give me singer in which children : { _lt: <val...,query { singer (where : { children : { _lt: <v...


In [315]:
#GraphQL Query Generator
import random

def generate_graphql_query(input_elements):
    while True:
        query = "query { "

        entity = random.choice(input_elements["entities"])
        query += f"{entity}"

        arguments_applied = False

        # Apply random arguments
        if input_elements["arguments"] and random.choice([True, False]):
            arguments_applied = True
            query += " ("
            arg = random.choice(input_elements["arguments"])
            query += f"{arg} : "

            if arg == "order_by":
                prop = random.choice(input_elements["properties"])
                order = random.choice(input_elements["orderings"])
                query += f"{{ {prop} : {order} }} "
            elif arg == "limit":
                limit = random.randint(1, 100)
                query += f"{limit} "
            elif arg == "distinct_on":
                prop = random.choice(input_elements["properties"])
                query += f"{prop} "
            else:
                query += "{} "

        # Apply random filter conditions
        if input_elements["filters"] and random.choice([True, False]):
            if not arguments_applied:
                query += " ("
                arguments_applied = True
            query += "where : { "
            logical_operator = random.choice(input_elements["logical_operators"])
            query += f"{logical_operator} : "

            prop = random.choice(input_elements["properties"])
            comparison_operator = random.choice(input_elements["comparison_operators"])
            if comparison_operator == "_like":
                value = f"\"%{random.choice(input_elements['like_arguments'])}%\""
            else:
                value = random.uniform(0, 10000)  # Random value for the sake of example
            query += f"{prop} : {{ {comparison_operator} : {value} }}"

            query += " } "

        if arguments_applied:
            query += ")"

        query += " { "

        # Select random properties and aggregators to be returned
        if random.choice([True, False]):
            query += "aggregate { "
            num_aggregators = random.randint(1, len(input_elements["aggregators"]))
            for _ in range(num_aggregators):
                aggregator = random.choice(input_elements["aggregators"])
                prop = random.choice(input_elements["properties"])
                query += f"{aggregator} {{ {prop} }} "
            query += "} "
        else:
            num_properties = random.randint(1, 5)
            for _ in range(num_properties):
                prop = random.choice(input_elements["properties"])
                query += f"{prop} "
                if random.choice([True, False]):
                    query += "{ "
                    nested_properties = [p for p in input_elements["properties"] if p != prop]
                    for _ in range(random.randint(1, 5)):
                        nested_prop = random.choice(nested_properties)
                        query += f"{nested_prop} "
                    query += "} "

        query += "} }"

        if 80 <= len(query) <= 140:
            return query

# Example input elements
input_elements = {
    "entities": ["students", "cars_data", "matches", "car_names", "professionals", "departments"],
    "properties": ["date_first_registered", "first_name", "middle_name", "last_name", "accelerate", "car_name", "model", "mpg", "cylinders", "winner_name", "loser_name", "minutes", "role_code", "street", "city", "state", "department_name", "department_description"],
    "arguments": ["order_by", "limit", "distinct_on"],
    "logical_operators": ["_and", "_or", "_not"],
    "aggregators": ["min", "max", "sum", "avg"],
    "comparison_operators": ["_eq", "_gt", "_lt", "_neq", "_like"],
    "filters": ["where"],
    "orderings": ["asc", "desc"],
    "like_arguments": ["car", "name", "street", "dep", "man", "department"]
}

# Generate a random GraphQL query
generated_query = generate_graphql_query(input_elements)
print(generated_query)


query { matches (distinct_on : date_first_registered ) { aggregate { max { middle_name } } } }


In [276]:
import random

def generate_graphql_query(input_elements, min_length=80, max_length=120):
    def generate_random_query(input_elements):
        entity = random.choice(input_elements["entities"])
        property_ = random.choice(input_elements["properties"])

        query_parts = []

        if random.random() < 0.5:
            filter_ = random.choice(input_elements["filters"])
            logical_operator = random.choice(input_elements["logical_operators"])
            comparison_operator = random.choice(input_elements["comparison_operators"])
            query_parts.append(f'{filter_}: {{ {logical_operator}: {{ {property_}: {{ {comparison_operator}: "value" }} }} }}')

        if random.random() < 0.5:
            aggregator = random.choice(input_elements["aggregators"])
            query_parts.append(f'{aggregator}: {{ {property_} }}')

        if random.random() < 0.5:
            ordering = random.choice(input_elements["orderings"])
            query_parts.append(f'order_by: {{ {property_}: {ordering} }}')

        query_body = ", ".join(query_parts)
        return f'query {{ {entity} ({query_body}) {{ {property_} }} }}'

    query = generate_random_query(input_elements)
    while len(query) < min_length or len(query) > max_length:
        query = generate_random_query(input_elements)

    return query

# Example input elements
input_elements = {
    "entities": ["countrylanguage_aggregate", "country_aggregate", "cartoon", "country", "people"],
    "properties": ["isofficial", "country", "indepyear", "region", "tv_channel", "series_name", "name", "surfacearea", "continent", "population", "lifeexpectancy", "poker_players_aggregate", "final_table_made", "title"],
    "nested_properties": ["aggregate", "count", "sum", "avg", "min", "max"],
    "arguments": ["order_by", "limit", "distinct_on"],
    "logical_operators": ["_and", "_or", "_not"],
    "comparison_operators": ["_eq", "_neq", "_gt", "_lt", "_like"],
    "aggregators": ["aggregate"],
    "orderings": ["asc", "desc"],
    "filters": ["where"],
}

# Example usage
for _ in range(30):
    print(generate_graphql_query(input_elements))
    print("\n")

query { cartoon (where: { _not: { name: { _eq: "value" } } }, aggregate: { name }, order_by: { name: desc }) { name } }


query { countrylanguage_aggregate (order_by: { population: asc }) { population } }


query { countrylanguage_aggregate (where: { _or: { title: { _eq: "value" } } }) { title } }


query { country_aggregate (aggregate: { population }, order_by: { population: desc }) { population } }


query { cartoon (where: { _and: { title: { _like: "value" } } }, order_by: { title: asc }) { title } }


query { people (where: { _or: { country: { _eq: "value" } } }, aggregate: { country }) { country } }


query { country_aggregate (where: { _and: { name: { _gt: "value" } } }) { name } }


query { countrylanguage_aggregate (order_by: { lifeexpectancy: asc }) { lifeexpectancy } }


query { cartoon (aggregate: { continent }, order_by: { continent: asc }) { continent } }


query { countrylanguage_aggregate (aggregate: { surfacearea }) { surfacearea } }


query { people (where: { _or: { su