Skip to content

Commit

Permalink
adding distributed_by support from annotation in ddlog
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasPalomares committed Dec 12, 2015
1 parent 959282f commit 90ccb78
Showing 1 changed file with 52 additions and 1 deletion.
1 change: 0 additions & 1 deletion database/db-driver/greenplum/schema_json_to_sql

This file was deleted.

52 changes: 52 additions & 0 deletions database/db-driver/greenplum/schema_json_to_sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#! /usr/bin/env python
# Generate create table statement given a ddlog exported schema and a table name.
# Usage: ddlog_initdb SCHEMA.JSON TABLE_NAME

#This is a variation of the postgresql driver's version.
#It generates the extra sql component "DISTRIBUTED BY (column_name(s))" when the annotation @distributed_by is added below one or several columns in a table declaration in ddlog

import json, sys

def generate_create_table_sql(schema, table):
columns_json = schema["relations"][table]["columns"]
# variable relation
if "variable_type" in schema["relations"][table]:
columns = range(len(columns_json) + 2)
columns[-2] = "id bigint"
label_type = "boolean" if schema["relations"][table]["variable_type"] == "boolean" else "int"
columns[-1] = "label " + label_type
else:
columns = range(len(columns_json))
distributed_by_array=[]
for k, v in columns_json.iteritems():
columns[v["index"]] = "%s %s" %(k, v["type"])
if "annotations" in v.keys():
for sub_dict_annotations in v["annotations"]:
if sub_dict_annotations["name"] == "distributed_by":
distributed_by_array.append(k)
if len(distributed_by_array) == 0:
return "DROP TABLE IF EXISTS %s CASCADE; CREATE TABLE %s(%s);" %(table, table, ", ".join(columns))
else:
return "DROP TABLE IF EXISTS %s CASCADE; CREATE TABLE %s(%s) DISTRIBUTED BY (%s);" %(table, table, ", ".join(columns), ", ".join(distributed_by_array))


def main():
# load schema.json
with open(sys.argv[1]) as schema_file:
schema = json.load(schema_file)
if "relations" not in schema:
# nothing to do
sys.exit(0)
# initialize all tables
if len(sys.argv) <= 2:
print ' '.join([generate_create_table_sql(schema, table) for table in (schema["relations"].keys() if "relations" in schema else [])])
else:
table = sys.argv[2]
# the given table is not in the schema, exit with error
if table in schema["relations"]:
print generate_create_table_sql(schema, table)
else:
sys.exit(1)

if __name__ == "__main__":
main()

0 comments on commit 90ccb78

Please sign in to comment.