In [1]:
test_sql = 'CREATE TABLE companies(id BIGINT, company_name VARCHAR, description VARCHAR);;'

In [14]:
def get_tablename(sql_query):
    return sql_query.split('CREATE TABLE ')[1].split('(')[0]

def get_table_fields(sql_query):
    fields = sql_query.split('(')[1].split(')')[0]
    return [x for x in fields.split(', ')]

def convert_sqltype_to_generator(default_generators, dtype):
    try:
        return default_generators[dtype]
    except:
        return default_generators['UNKNOWN']

def convert_fields_to_generator(default_generators, fields):
    column_statement = ''
    len_fields = len(fields) -1
    for i in enumerate(fields):
        name, dtype = i[1].split(' ')
        dtype = convert_sqltype_to_generator(default_generators, dtype)
        column_statement += f'{dtype} as {name},'
        if i[0] < len_fields:
            column_statement += '\n'
    return column_statement

def construct_sql_generator(default_generators, sql_query, nrows):
    table_name = get_tablename(sql_query)
    fields = get_table_fields(sql_query)
    col_statement = convert_fields_to_generator(default_generators, fields)
    return f"""CREATE OR REPLACE TABLE {table_name} AS 
(SELECT\n{col_statement} 
FROM range({nrows}));
"""

def parse_full_sql_schema(input_filename, output_filename, nrows, default_generators= default_generators,):
    with open(input_filename, 'r') as f:
        with open(output_filename, 'w+') as filehandle:
            for line in f.readlines():
                filehandle.write(construct_sql_generator(default_generators, line, nrows))


In [15]:
fields = get_table_fields(test_sql)
convert_fields_to_generator(default_generators, fields)

"faker_en('random_int') as id,\nxeger('[a-zA-Z0-9]{6,}') as company_name,\nxeger('[a-zA-Z0-9]{6,}') as description,"

In [5]:
default_generators = {
    'BIGINT': "faker_en('random_int')",
    'VARCHAR': "xeger('[a-zA-Z0-9]{6,}')",
    'UNKNOWN': "xeger('[a-zA-Z0-9]{6,}')",

}

default_generators['BIGINT']

"faker_en('random_int')"

In [16]:
parse_full_sql_schema('./examples/test_schema.sql', 'test_generator.sql', 45)

In [71]:
import pprint

pp = pprint.PrettyPrinter(indent=4)
pp.pprint(construct_sql_generator(test_sql, 10))

('CREATE OR REPLACE TABLE companies AS \n'
 '(SELECT\n'
 'BIGINT as id,\n'
 'VARCHAR as company_name,\n'
 'VARCHAR as description, \n'
 'FROM range(10));')


In [72]:
filename = "test_generator.sql"
with open(filename, 'w+') as filehandle:
    filehandle.write(construct_sql_generator(test_sql, 100))