From 1426b7de3cec65d07d6dda2d9b271cf89b5dd1be Mon Sep 17 00:00:00 2001 From: ruether Date: Thu, 7 Nov 2013 15:31:35 +0100 Subject: [PATCH] SqlFactExtractor and FragmentLocator are running and can be tested, but need refactoring. --- technologies/SqlFactExtractor/Makefile | 7 + technologies/SqlFactExtractor/Readme.md | 20 + .../SqlFactExtractor/SQLClassifier.py | 4 + .../SqlFactExtractor/SQLFactExtractor.py | 229 +++++ .../SqlFactExtractor/example/createTable.sql | 35 + .../example/estimatedResult.json | 1 + .../estimatedResultWithLineNumbers.json | 1 + technologies/SqlFactExtractor/extractor.py | 5 + technologies/SqlFactExtractor/sqlKeyWords.txt | 824 ++++++++++++++++++ technologies/SqlFactExtractor/test.py | 11 + technologies/SqlFragmentLocator/Makefile | 19 + technologies/SqlFragmentLocator/README.md | 31 + .../SqlFragmentLocator/SQLClassifier.py | 4 + .../SqlFragmentLocator/SQLFactExtractor.py | 229 +++++ .../example/createTable.sql | 35 + .../example/estimatedResultColumn.json | 1 + .../example/estimatedResultCreate.json | 1 + .../example/estimatedResultFile.json | 1 + .../example/estimatedResultTable.json | 1 + technologies/SqlFragmentLocator/locator.py | 39 + .../SqlFragmentLocator/sqlKeyWords.txt | 824 ++++++++++++++++++ 21 files changed, 2322 insertions(+) create mode 100644 technologies/SqlFactExtractor/Makefile create mode 100644 technologies/SqlFactExtractor/Readme.md create mode 100644 technologies/SqlFactExtractor/SQLClassifier.py create mode 100644 technologies/SqlFactExtractor/SQLFactExtractor.py create mode 100644 technologies/SqlFactExtractor/example/createTable.sql create mode 100644 technologies/SqlFactExtractor/example/estimatedResult.json create mode 100644 technologies/SqlFactExtractor/example/estimatedResultWithLineNumbers.json create mode 100644 technologies/SqlFactExtractor/extractor.py create mode 100644 technologies/SqlFactExtractor/sqlKeyWords.txt create mode 100644 technologies/SqlFactExtractor/test.py create mode 100644 technologies/SqlFragmentLocator/Makefile create mode 100644 technologies/SqlFragmentLocator/README.md create mode 100644 technologies/SqlFragmentLocator/SQLClassifier.py create mode 100644 technologies/SqlFragmentLocator/SQLFactExtractor.py create mode 100644 technologies/SqlFragmentLocator/example/createTable.sql create mode 100644 technologies/SqlFragmentLocator/example/estimatedResultColumn.json create mode 100644 technologies/SqlFragmentLocator/example/estimatedResultCreate.json create mode 100644 technologies/SqlFragmentLocator/example/estimatedResultFile.json create mode 100644 technologies/SqlFragmentLocator/example/estimatedResultTable.json create mode 100755 technologies/SqlFragmentLocator/locator.py create mode 100644 technologies/SqlFragmentLocator/sqlKeyWords.txt diff --git a/technologies/SqlFactExtractor/Makefile b/technologies/SqlFactExtractor/Makefile new file mode 100644 index 000000000..74e9de524 --- /dev/null +++ b/technologies/SqlFactExtractor/Makefile @@ -0,0 +1,7 @@ +test: + python test.py example/createTable.sql + diff --ignore-all-space example/testResult.json example/estimatedResult.json + diff --ignore-all-space example/testResultWithLineNumbers.json example/estimatedResultWithLineNumbers.json + + rm example/testResult.json + rm example/testResultWithLineNumbers.json diff --git a/technologies/SqlFactExtractor/Readme.md b/technologies/SqlFactExtractor/Readme.md new file mode 100644 index 000000000..32460a9b0 --- /dev/null +++ b/technologies/SqlFactExtractor/Readme.md @@ -0,0 +1,20 @@ +# Headline + +A fact extractor for Sql + +# Usage + +The executable is "extractor.py". + +The source code for fact extraction is read from stdin. + +The extracted JSON facts are written to stdout. + +# Testing + +Test the tool with "make test". + +See the Makefile for details. + +See example/createTable.sql is used as input. + diff --git a/technologies/SqlFactExtractor/SQLClassifier.py b/technologies/SqlFactExtractor/SQLClassifier.py new file mode 100644 index 000000000..e8d639eb9 --- /dev/null +++ b/technologies/SqlFactExtractor/SQLClassifier.py @@ -0,0 +1,4 @@ +CLASSIFIER_FILE = "sql_file" +CLASSIFIER_CREATE = "create_statement" +CLASSIFIER_TABLE = "table" +CLASSIFIER_COLUMN = "column" \ No newline at end of file diff --git a/technologies/SqlFactExtractor/SQLFactExtractor.py b/technologies/SqlFactExtractor/SQLFactExtractor.py new file mode 100644 index 000000000..24c9c7c12 --- /dev/null +++ b/technologies/SqlFactExtractor/SQLFactExtractor.py @@ -0,0 +1,229 @@ +import sqlparse +import sys +import json +import re +from SQLClassifier import * + +class SQLFactExtractor(object): + """docstring for SQLFactExtractor""" + + def enumerate_auto(*sequential, **named): + enums = dict(zip(sequential, range(len(sequential))), **named) + return type('Enum', (), enums) + + StatementType = enumerate_auto( + 'CREATE', + 'ALTER', + 'UNKNOWN' + ) + + ExpectedToken = enumerate_auto( + 'NONE', + 'BEGIN', + 'VARIABLE_NAME', + 'VARIABLE_TYPE' + ) + + def __init__(self, file, log_code): + self.sqlDatei = file + self.load_reserved_sql_keywords() + self.log_code = log_code + + def load_reserved_sql_keywords(self): + self.reserved_keywords = [] + for line in open("sqlKeyWords.txt", "r"): + self.reserved_keywords.append(str.lower(line.rstrip())) + + def get_statement_type(self,statement): + for my_token in statement.tokens: + if str.upper(str(my_token)) == "CREATE": + return self.StatementType.CREATE + elif str.upper(str(my_token)) == "ALTER": + return self.StatementType.ALTER + return self.StatementType.UNKNOWN + + def miss_whitespace(self,tokens_generator_elem): + while tokens_generator_elem.is_whitespace(): + tokens_generator_elem = tokens_generator_elem.next(); + return tokens_generator_elem + + def extract_file(self): + fragment_result = {"classifier": CLASSIFIER_FILE, "fragments": []} + constraints_list = [] + + for each in sqlparse.parse(open(self.sqlDatei).read()): + if each.get_type() != "UNKNOWN": + statement_type = self.get_statement_type(each) + if self.StatementType.CREATE == statement_type:#create + self.extract_create_statement(each, fragment_result) + elif self.StatementType.ALTER == statement_type:#alter + self.extract_alter_statement(each, constraints_list) + + self.add_contraints(fragment_result, constraints_list) + if self.log_code: + self.add_code_linenumbers(fragment_result) + + return fragment_result + + def extract_create_statement(self, each, fragment_result): + subject_token = None + item_list = [] + expected_token = self.ExpectedToken.BEGIN + + #get Relevant Data + for token in each.tokens: + #print(dir(token)) + #print(token.value) + if str(token) not in ['create', 'table'] and not token.is_whitespace() and str(token) != ";": + subject_token = token.flatten().next() + for sub_token in token.flatten(): + if self.ExpectedToken.BEGIN == expected_token and '(' in str(sub_token): + expected_token = self.ExpectedToken.VARIABLE_NAME + elif self.ExpectedToken.VARIABLE_NAME == expected_token and not sub_token.is_whitespace(): + if str.lower(str(sub_token)) in self.reserved_keywords: + expected_token = self.ExpectedToken.NONE + else: + item_list.append(str(sub_token)) + expected_token = self.ExpectedToken.VARIABLE_TYPE + elif self.ExpectedToken.VARIABLE_TYPE == expected_token and not sub_token.is_whitespace(): + item_list.append(str(sub_token)) + expected_token = self.ExpectedToken.NONE + elif self.ExpectedToken.NONE == expected_token and ',' in str(sub_token): + expected_token = self.ExpectedToken.VARIABLE_NAME + # create fragments as JSON + statement_fragment = {"classifier": CLASSIFIER_CREATE, "fragments": []} + table_fragment = {"classifier": CLASSIFIER_TABLE, "name": str(subject_token), "fragments": []} + i = 0 + while i < len(item_list): + table_fragment["fragments"].append( + {"classifier": CLASSIFIER_COLUMN, "name": item_list[i], "type": item_list[i + 1], "fragments": []}) + i += 2 + statement_fragment["fragments"].append(table_fragment) + fragment_result["fragments"].append(statement_fragment) + + self.add_code_to_fragments(each, statement_fragment) + + + def extract_alter_statement(self, each, constraints_list): + subject_table = None + constraint_name = "" + foreign_key_var = "" + references = "" + #get Relevant Data + i = 0 + while str.lower(str(each.tokens[i])) in ['alter', 'table'] or each.tokens[i].is_whitespace(): + i += 1 + subject_table = str(each.tokens[i]) + while str.lower(str(each.tokens[i])) != 'constraint' or each.tokens[i].is_whitespace(): + i += 1 + constraint_name = str(each.tokens[i + 2]) + while (str.lower(str(each.tokens[i])) != 'foreign' and str.lower(str(each.tokens[i + 2])) != 'key') or \ + each.tokens[i].is_whitespace(): + i += 1 + foreign_key_var = str(each.tokens[i + 4]) + foreign_key_var = foreign_key_var[1:len(foreign_key_var) - 1] + while str.lower(str(each.tokens[i])) != 'references' or each.tokens[i].is_whitespace(): + i += 1 + references = str(each.tokens[i + 2]) + + constraints_list.append({ + "subject_table": subject_table, + "constraint_name": constraint_name, + "foreign_key_var": foreign_key_var, + "references": references + }) + + def add_code_to_fragments(self, each, statement_fragment): + if self.log_code: + statement_fragment["code"] = str(each) + statement_fragment["fragments"][0]["code"] = self.delete_beginning_control_characters(self.remouve_beginning_create(str(each))) + + #filter column code + long_string = str(each) + long_string = long_string[long_string.find("(")+1:] + column_code_list = self.format_end_of_column_generation( long_string.split(",")) + + for column_id in range(0, len(statement_fragment["fragments"][0]["fragments"])): + statement_fragment["fragments"][0]["fragments"][column_id]["code"] = self.delete_beginning_control_characters(column_code_list[column_id]) + + def remouve_beginning_create(self, code_string): + return code_string[code_string.find("table"):] + + def format_end_of_column_generation(self, column_codes): + column_codes[-1] = self.delete_last_char_if_equals(column_codes[-1], ";") + column_codes[-1] = self.delete_last_char_if_equals(column_codes[-1], ")") + + return column_codes + + def delete_last_char_if_equals(self, string, char): + if char == string[-1]: + string = string[:-1] + return string + + def delete_beginning_control_characters(self, string): + return string[re.search("\w",string).start() :] + + def add_contraints(self, fragment_result, constraints_list): + for constraint in constraints_list: + #find table + for create in fragment_result["fragments"]: + if create["fragments"][0]["name"] == constraint["subject_table"]: + #find column + for column in create["fragments"][0]["fragments"]: + if column["name"] == constraint["foreign_key_var"]: + column["constraints"] = [{ + "type": "foreign_key", + "references": constraint["references"], + "name": constraint["constraint_name"] + }] + + def add_code_linenumbers(self, fragment_result): + self.add_file_linenumbers(fragment_result) + + self.add_fragment_linenumbers(fragment_result["fragments"], 1, self.get_file_length(self.sqlDatei)) + + def add_fragment_linenumbers(self, fragment_pointer, start, end): + if type(fragment_pointer) == list: + for frag_elem in fragment_pointer: + self.add_fragment_linenumbers(frag_elem, start, end) + elif type(fragment_pointer) == dict: + self.derive_linenumber(fragment_pointer, start, end) + self.add_fragment_linenumbers(fragment_pointer["fragments"], fragment_pointer["line_start"], fragment_pointer["line_end"]) + else: + print("ERROR: add_code_linenumbers UNKNOWN type:"+str(type(fragment_pointer))) + + def derive_linenumber(self, fragment_pointer, start, end): + open_file = open(self.sqlDatei, "r") + + self.go_to_line(open_file, start) + + char_pointer_fragment = 0 + line_counter = start + start_line = start + while char_pointer_fragment < len(fragment_pointer["code"]): + char = open_file.read(1) + if("\n" == char): + line_counter += 1 + if char == fragment_pointer["code"][char_pointer_fragment]: + if char_pointer_fragment == 0: + start_line = line_counter + char_pointer_fragment += 1 + else: + char_pointer_fragment = 0 + + fragment_pointer["line_start"] = start_line + fragment_pointer["line_end"] = line_counter + + def go_to_line(self, open_file, start): + for times in range(1,start): + open_file.readline() + + def add_file_linenumbers(self, fragment_result): + fragment_result["line_start"] = 1 + fragment_result["line_end"] = self.get_file_length(self.sqlDatei) + + def get_file_length(self, file_string): + linenumber = 0 + for line in open(self.sqlDatei): + linenumber += 1 + return linenumber diff --git a/technologies/SqlFactExtractor/example/createTable.sql b/technologies/SqlFactExtractor/example/createTable.sql new file mode 100644 index 000000000..7fb25e35c --- /dev/null +++ b/technologies/SqlFactExtractor/example/createTable.sql @@ -0,0 +1,35 @@ +create + table COMPANY + ( + ID bigint generated by default as identity (start with 1), + name varchar(255), + primary key (ID) + ); + +create + table DEPARTMENT + ( + ID bigint generated by default as identity (start with 1), + name varchar(255), + COMP_ID bigint, + DEPT_ID bigint, + primary key (ID) + ); + +create + table EMPLOYEE + ( + ID bigint generated by default as identity (start with 1), + name varchar(255), + address varchar(255), + salary double, + manager bit, + MENTOR bigint, + DEPT_ID bigint, + primary key (ID) + ); + +alter table DEPARTMENT add constraint FK4F782F5255C77F64 foreign key (DEPT_ID) references DEPARTMENT; +alter table DEPARTMENT add constraint FK4F782F52C7CB872B foreign key (COMP_ID) references COMPANY; +alter table EMPLOYEE add constraint FK75C8D6AE55C77F64 foreign key (DEPT_ID) references DEPARTMENT; +alter table EMPLOYEE add constraint FK75C8D6AE800BE06C foreign key (MENTOR) references EMPLOYEE; \ No newline at end of file diff --git a/technologies/SqlFactExtractor/example/estimatedResult.json b/technologies/SqlFactExtractor/example/estimatedResult.json new file mode 100644 index 000000000..35a4d224a --- /dev/null +++ b/technologies/SqlFactExtractor/example/estimatedResult.json @@ -0,0 +1 @@ +{"fragments": [{"fragments": [{"fragments": [{"fragments": [], "type": "bigint", "classifier": "column", "name": "ID"}, {"fragments": [], "type": "varchar", "classifier": "column", "name": "name"}], "classifier": "table", "name": "COMPANY"}], "classifier": "create_statement"}, {"fragments": [{"fragments": [{"fragments": [], "type": "bigint", "classifier": "column", "name": "ID"}, {"fragments": [], "type": "varchar", "classifier": "column", "name": "name"}, {"fragments": [], "constraints": [{"references": "COMPANY", "type": "foreign_key", "name": "FK4F782F52C7CB872B"}], "type": "bigint", "classifier": "column", "name": "COMP_ID"}, {"fragments": [], "constraints": [{"references": "DEPARTMENT", "type": "foreign_key", "name": "FK4F782F5255C77F64"}], "type": "bigint", "classifier": "column", "name": "DEPT_ID"}], "classifier": "table", "name": "DEPARTMENT"}], "classifier": "create_statement"}, {"fragments": [{"fragments": [{"fragments": [], "type": "bigint", "classifier": "column", "name": "ID"}, {"fragments": [], "type": "varchar", "classifier": "column", "name": "name"}, {"fragments": [], "type": "varchar", "classifier": "column", "name": "address"}, {"fragments": [], "type": "double", "classifier": "column", "name": "salary"}, {"fragments": [], "type": "bit", "classifier": "column", "name": "manager"}, {"fragments": [], "constraints": [{"references": "EMPLOYEE", "type": "foreign_key", "name": "FK75C8D6AE800BE06C"}], "type": "bigint", "classifier": "column", "name": "MENTOR"}, {"fragments": [], "constraints": [{"references": "DEPARTMENT", "type": "foreign_key", "name": "FK75C8D6AE55C77F64"}], "type": "bigint", "classifier": "column", "name": "DEPT_ID"}], "classifier": "table", "name": "EMPLOYEE"}], "classifier": "create_statement"}], "classifier": "sql_file"} diff --git a/technologies/SqlFactExtractor/example/estimatedResultWithLineNumbers.json b/technologies/SqlFactExtractor/example/estimatedResultWithLineNumbers.json new file mode 100644 index 000000000..7a048f94c --- /dev/null +++ b/technologies/SqlFactExtractor/example/estimatedResultWithLineNumbers.json @@ -0,0 +1 @@ +{"fragments": [{"fragments": [{"code": "table COMPANY \n\t(\n\t\tID bigint generated by default as identity (start with 1), \n\t\tname varchar(255), \n\t\tprimary key (ID)\n\t);", "name": "COMPANY", "line_start": 2, "fragments": [{"code": "ID bigint generated by default as identity (start with 1)", "name": "ID", "line_end": 4, "line_start": 4, "fragments": [], "type": "bigint", "classifier": "column"}, {"code": "name varchar(255)", "name": "name", "line_end": 5, "line_start": 5, "fragments": [], "type": "varchar", "classifier": "column"}], "line_end": 7, "classifier": "table"}], "code": "create \n\ttable COMPANY \n\t(\n\t\tID bigint generated by default as identity (start with 1), \n\t\tname varchar(255), \n\t\tprimary key (ID)\n\t);", "line_end": 7, "classifier": "create_statement", "line_start": 1}, {"fragments": [{"code": "table DEPARTMENT \n\t(\n\t\tID bigint generated by default as identity (start with 1), \n\t\tname varchar(255), \n\t\tCOMP_ID bigint, \n\t\tDEPT_ID bigint, \n\t\tprimary key (ID)\n\t);", "name": "DEPARTMENT", "line_start": 10, "fragments": [{"code": "ID bigint generated by default as identity (start with 1)", "name": "ID", "line_end": 12, "line_start": 12, "fragments": [], "type": "bigint", "classifier": "column"}, {"code": "name varchar(255)", "name": "name", "line_end": 13, "line_start": 13, "fragments": [], "type": "varchar", "classifier": "column"}, {"code": "COMP_ID bigint", "name": "COMP_ID", "line_end": 14, "line_start": 14, "fragments": [], "type": "bigint", "classifier": "column", "constraints": [{"references": "COMPANY", "type": "foreign_key", "name": "FK4F782F52C7CB872B"}]}, {"code": "DEPT_ID bigint", "name": "DEPT_ID", "line_end": 15, "line_start": 15, "fragments": [], "type": "bigint", "classifier": "column", "constraints": [{"references": "DEPARTMENT", "type": "foreign_key", "name": "FK4F782F5255C77F64"}]}], "line_end": 17, "classifier": "table"}], "code": "\n\ncreate \n\ttable DEPARTMENT \n\t(\n\t\tID bigint generated by default as identity (start with 1), \n\t\tname varchar(255), \n\t\tCOMP_ID bigint, \n\t\tDEPT_ID bigint, \n\t\tprimary key (ID)\n\t);", "line_end": 17, "classifier": "create_statement", "line_start": 8}, {"fragments": [{"code": "table EMPLOYEE \n\t(\n\t\tID bigint generated by default as identity (start with 1),\n\t\tname varchar(255), \n\t\taddress varchar(255), \n\t\tsalary double, \n\t\tmanager bit, \n\t\tMENTOR bigint, \n\t\tDEPT_ID bigint, \n\t\tprimary key (ID)\n\t);", "name": "EMPLOYEE", "line_start": 20, "fragments": [{"code": "ID bigint generated by default as identity (start with 1)", "name": "ID", "line_end": 22, "line_start": 22, "fragments": [], "type": "bigint", "classifier": "column"}, {"code": "name varchar(255)", "name": "name", "line_end": 23, "line_start": 23, "fragments": [], "type": "varchar", "classifier": "column"}, {"code": "address varchar(255)", "name": "address", "line_end": 24, "line_start": 24, "fragments": [], "type": "varchar", "classifier": "column"}, {"code": "salary double", "name": "salary", "line_end": 25, "line_start": 25, "fragments": [], "type": "double", "classifier": "column"}, {"code": "manager bit", "name": "manager", "line_end": 26, "line_start": 26, "fragments": [], "type": "bit", "classifier": "column"}, {"code": "MENTOR bigint", "name": "MENTOR", "line_end": 27, "line_start": 27, "fragments": [], "type": "bigint", "classifier": "column", "constraints": [{"references": "EMPLOYEE", "type": "foreign_key", "name": "FK75C8D6AE800BE06C"}]}, {"code": "DEPT_ID bigint", "name": "DEPT_ID", "line_end": 28, "line_start": 28, "fragments": [], "type": "bigint", "classifier": "column", "constraints": [{"references": "DEPARTMENT", "type": "foreign_key", "name": "FK75C8D6AE55C77F64"}]}], "line_end": 30, "classifier": "table"}], "code": "\n\ncreate \n\ttable EMPLOYEE \n\t(\n\t\tID bigint generated by default as identity (start with 1),\n\t\tname varchar(255), \n\t\taddress varchar(255), \n\t\tsalary double, \n\t\tmanager bit, \n\t\tMENTOR bigint, \n\t\tDEPT_ID bigint, \n\t\tprimary key (ID)\n\t);", "line_end": 30, "classifier": "create_statement", "line_start": 18}], "line_end": 35, "classifier": "sql_file", "line_start": 1} diff --git a/technologies/SqlFactExtractor/extractor.py b/technologies/SqlFactExtractor/extractor.py new file mode 100644 index 000000000..8a28c1f28 --- /dev/null +++ b/technologies/SqlFactExtractor/extractor.py @@ -0,0 +1,5 @@ +#! /usr/bin/env python +from SQLFactExtractor import * + +extractor = SQLFactExtractor(sys.argv[1], False) +print(json.dumps(extractor.extract_file())) diff --git a/technologies/SqlFactExtractor/sqlKeyWords.txt b/technologies/SqlFactExtractor/sqlKeyWords.txt new file mode 100644 index 000000000..94b833c1c --- /dev/null +++ b/technologies/SqlFactExtractor/sqlKeyWords.txt @@ -0,0 +1,824 @@ +A +ABORT +ABS +ABSOLUTE +ACCESS +ACTION +ADA +ADD +ADMIN +AFTER +AGGREGATE +ALIAS +ALL +ALLOCATE +ALSO +ALTER +ALWAYS +ANALYSE +ANALYZE +AND +ANY +ARE +ARRAY +AS +ASC +ASENSITIVE +ASSERTION +ASSIGNMENT +ASYMMETRIC +AT +ATOMIC +ATTRIBUTE +ATTRIBUTES +AUDIT +AUTHORIZATION +AUTO_INCREMENT +AVG +AVG_ROW_LENGTH +BACKUP +BACKWARD +BEFORE +BEGIN +BERNOULLI +BETWEEN +BIGINT +BINARY +BIT +BIT_LENGTH +BITVAR +BLOB +BOOL +BOOLEAN +BOTH +BREADTH +BREAK +BROWSE +BULK +BY +C +CACHE +CALL +CALLED +CARDINALITY +CASCADE +CASCADED +CASE +CAST +CATALOG +CATALOG_NAME +CEIL +CEILING +CHAIN +CHANGE +CHAR +CHAR_LENGTH +CHARACTER +CHARACTER_LENGTH +CHARACTER_SET_CATALOG +CHARACTER_SET_NAME +CHARACTER_SET_SCHEMA +CHARACTERISTICS +CHARACTERS +CHECK +CHECKED +CHECKPOINT +CHECKSUM +CLASS +CLASS_ORIGIN +CLOB +CLOSE +CLUSTER +CLUSTERED +COALESCE +COBOL +COLLATE +COLLATION +COLLATION_CATALOG +COLLATION_NAME +COLLATION_SCHEMA +COLLECT +COLUMN +COLUMN_NAME +COLUMNS +COMMAND_FUNCTION +COMMAND_FUNCTION_CODE +COMMENT +COMMIT +COMMITTED +COMPLETION +COMPRESS +COMPUTE +CONDITION +CONDITION_NUMBER +CONNECT +CONNECTION +CONNECTION_NAME +CONSTRAINT +CONSTRAINT_CATALOG +CONSTRAINT_NAME +CONSTRAINT_SCHEMA +CONSTRAINTS +CONSTRUCTOR +CONTAINS +CONTAINSTABLE +CONTINUE +CONVERSION +CONVERT +COPY +CORR +CORRESPONDING +COUNT +COVAR_POP +COVAR_SAMP +CREATE +CREATEDB +CREATEROLE +CREATEUSER +CROSS +CSV +CUBE +CUME_DIST +CURRENT +CURRENT_DATE +CURRENT_DEFAULT_TRANSFORM_GROUP +CURRENT_PATH +CURRENT_ROLE +CURRENT_TIME +CURRENT_TIMESTAMP +CURRENT_TRANSFORM_GROUP_FOR_TYPE +CURRENT_USER +CURSOR +CURSOR_NAME +CYCLE +DATA +DATABASE +DATABASES +DATE +DATETIME +DATETIME_INTERVAL_CODE +DATETIME_INTERVAL_PRECISION +DAY +DAY_HOUR +DAY_MICROSECOND +DAY_MINUTE +DAY_SECOND +DAYOFMONTH +DAYOFWEEK +DAYOFYEAR +DBCC +DEALLOCATE +DEC +DECIMAL +DECLARE +DEFAULT +DEFAULTS +DEFERRABLE +DEFERRED +DEFINED +DEFINER +DEGREE +DELAY_KEY_WRITE +DELAYED +DELETE +DELIMITER +DELIMITERS +DENSE_RANK +DENY +DEPTH +DEREF +DERIVED +DESC +DESCRIBE +DESCRIPTOR +DESTROY +DESTRUCTOR +DETERMINISTIC +DIAGNOSTICS +DICTIONARY +DISABLE +DISCONNECT +DISK +DISPATCH +DISTINCT +DISTINCTROW +DISTRIBUTED +DIV +DO +DOMAIN +DOUBLE +DROP +DUAL +DUMMY +DUMP +DYNAMIC +DYNAMIC_FUNCTION +DYNAMIC_FUNCTION_CODE +EACH +ELEMENT +ELSE +ELSEIF +ENABLE +ENCLOSED +ENCODING +ENCRYPTED +END +END-EXEC +ENUM +EQUALS +ERRLVL +ESCAPE +ESCAPED +EVERY +EXCEPT +EXCEPTION +EXCLUDE +EXCLUDING +EXCLUSIVE +EXEC +EXECUTE +EXISTING +EXISTS +EXIT +EXP +EXPLAIN +EXTERNAL +EXTRACT +FALSE +FETCH +FIELDS +FILE +FILLFACTOR +FILTER +FINAL +FIRST +FLOAT +FLOAT4 +FLOAT8 +FLOOR +FLUSH +FOLLOWING +FOR +FORCE +FOREIGN +FORTRAN +FORWARD +FOUND +FREE +FREETEXT +FREETEXTTABLE +FREEZE +FROM +FULL +FULLTEXT +FUNCTION +FUSION +G +GENERAL +GENERATED +GET +GLOBAL +GO +GOTO +GRANT +GRANTED +GRANTS +GREATEST +GROUP +GROUPING +HANDLER +HAVING +HEADER +HEAP +HIERARCHY +HIGH_PRIORITY +HOLD +HOLDLOCK +HOST +HOSTS +HOUR +HOUR_MICROSECOND +HOUR_MINUTE +HOUR_SECOND +IDENTIFIED +IDENTITY +IDENTITY_INSERT +IDENTITYCOL +IF +IGNORE +ILIKE +IMMEDIATE +IMMUTABLE +IMPLEMENTATION +IMPLICIT +IN +INCLUDE +INCLUDING +INCREMENT +INDEX +INDICATOR +INFILE +INFIX +INHERIT +INHERITS +INITIAL +INITIALIZE +INITIALLY +INNER +INOUT +INPUT +INSENSITIVE +INSERT +INSERT_ID +INSTANCE +INSTANTIABLE +INSTEAD +INT +INT1 +INT2 +INT3 +INT4 +INT8 +INTEGER +INTERSECT +INTERSECTION +INTERVAL +INTO +INVOKER +IS +ISAM +ISNULL +ISOLATION +ITERATE +JOIN +K +KEY +KEY_MEMBER +KEY_TYPE +KEYS +KILL +LANCOMPILER +LANGUAGE +LARGE +LAST +LAST_INSERT_ID +LATERAL +LEADING +LEAST +LEAVE +LEFT +LENGTH +LESS +LEVEL +LIKE +LIMIT +LINENO +LINES +LISTEN +LN +LOAD +LOCAL +LOCALTIME +LOCALTIMESTAMP +LOCATION +LOCATOR +LOCK +LOGIN +LOGS +LONG +LONGBLOB +LONGTEXT +LOOP +LOW_PRIORITY +LOWER +M +MAP +MATCH +MATCHED +MAX +MAX_ROWS +MAXEXTENTS +MAXVALUE +MEDIUMBLOB +MEDIUMINT +MEDIUMTEXT +MEMBER +MERGE +MESSAGE_LENGTH +MESSAGE_OCTET_LENGTH +MESSAGE_TEXT +METHOD +MIDDLEINT +MIN +MIN_ROWS +MINUS +MINUTE +MINUTE_MICROSECOND +MINUTE_SECOND +MINVALUE +MLSLABEL +MOD +MODE +MODIFIES +MODIFY +MODULE +MONTH +MONTHNAME +MORE +MOVE +MULTISET +MUMPS +MYISAM +NAMES +NATIONAL +NATURAL +NCHAR +NCLOB +NESTING +NEW +NEXT +NO +NO_WRITE_TO_BINLOG +NOAUDIT +NOCHECK +NOCOMPRESS +NOCREATEDB +NOCREATEROLE +NOCREATEUSER +NOINHERIT +NOLOGIN +NONCLUSTERED +NONE +NORMALIZE +NORMALIZED +NOSUPERUSER +NOT +NOTHING +NOTIFY +NOTNULL +NOWAIT +NULL +NULLABLE +NULLIF +NULLS +NUMBER +NUMERIC +OBJECT +OCTET_LENGTH +OCTETS +OF +OFF +OFFLINE +OFFSET +OFFSETS +OIDS +OLD +ON +ONLINE +ONLY +OPEN +OPENDATASOURCE +OPENQUERY +OPENROWSET +OPENXML +OPERATION +OPERATOR +OPTIMIZE +OPTION +OPTIONALLY +OPTIONS +OR +ORDER +ORDERING +ORDINALITY +OTHERS +OUT +OUTER +OUTFILE +OUTPUT +OVER +OVERLAPS +OVERLAY +OVERRIDING +OWNER +PACK_KEYS +PAD +PARAMETER +PARAMETER_MODE +PARAMETER_NAME +PARAMETER_ORDINAL_POSITION +PARAMETER_SPECIFIC_CATALOG +PARAMETER_SPECIFIC_NAME +PARAMETER_SPECIFIC_SCHEMA +PARAMETERS +PARTIAL +PARTITION +PASCAL +PASSWORD +PATH +PCTFREE +PERCENT +PERCENT_RANK +PERCENTILE_CONT +PERCENTILE_DISC +PLACING +PLAN +PLI +POSITION +POSTFIX +POWER +PRECEDING +PRECISION +PREFIX +PREORDER +PREPARE +PREPARED +PRESERVE +PRIMARY +PRINT +PRIOR +PRIVILEGES +PROC +PROCEDURAL +PROCEDURE +PROCESS +PROCESSLIST +PUBLIC +PURGE +QUOTE +RAID0 +RAISERROR +RANGE +RANK +RAW +READ +READS +READTEXT +REAL +RECHECK +RECONFIGURE +RECURSIVE +REF +REFERENCES +REFERENCING +REGEXP +REGR_AVGX +REGR_AVGY +REGR_COUNT +REGR_INTERCEPT +REGR_R2 +REGR_SLOPE +REGR_SXX +REGR_SXY +REGR_SYY +REINDEX +RELATIVE +RELEASE +RELOAD +RENAME +REPEAT +REPEATABLE +REPLACE +REPLICATION +REQUIRE +RESET +RESIGNAL +RESOURCE +RESTART +RESTORE +RESTRICT +RESULT +RETURN +RETURNED_CARDINALITY +RETURNED_LENGTH +RETURNED_OCTET_LENGTH +RETURNED_SQLSTATE +RETURNS +REVOKE +RIGHT +RLIKE +ROLE +ROLLBACK +ROLLUP +ROUTINE +ROUTINE_CATALOG +ROUTINE_NAME +ROUTINE_SCHEMA +ROW +ROW_COUNT +ROW_NUMBER +ROWCOUNT +ROWGUIDCOL +ROWID +ROWNUM +ROWS +RULE +SAVE +SAVEPOINT +SCALE +SCHEMA +SCHEMA_NAME +SCHEMAS +SCOPE +SCOPE_CATALOG +SCOPE_NAME +SCOPE_SCHEMA +SCROLL +SEARCH +SECOND +SECOND_MICROSECOND +SECTION +SECURITY +SELECT +SELF +SENSITIVE +SEPARATOR +SEQUENCE +SERIALIZABLE +SERVER_NAME +SESSION +SESSION_USER +SET +SETOF +SETS +SETUSER +SHARE +SHOW +SHUTDOWN +SIGNAL +SIMILAR +SIMPLE +SIZE +SMALLINT +SOME +SONAME +SOURCE +SPACE +SPATIAL +SPECIFIC +SPECIFIC_NAME +SPECIFICTYPE +SQL +SQL_BIG_RESULT +SQL_BIG_SELECTS +SQL_BIG_TABLES +SQL_CALC_FOUND_ROWS +SQL_LOG_OFF +SQL_LOG_UPDATE +SQL_LOW_PRIORITY_UPDATES +SQL_SELECT_LIMIT +SQL_SMALL_RESULT +SQL_WARNINGS +SQLCA +SQLCODE +SQLERROR +SQLEXCEPTION +SQLSTATE +SQLWARNING +SQRT +SSL +STABLE +START +STARTING +STATE +STATEMENT +STATIC +STATISTICS +STATUS +STDDEV_POP +STDDEV_SAMP +STDIN +STDOUT +STORAGE +STRAIGHT_JOIN +STRICT +STRING +STRUCTURE +STYLE +SUBCLASS_ORIGIN +SUBLIST +SUBMULTISET +SUBSTRING +SUCCESSFUL +SUM +SUPERUSER +SYMMETRIC +SYNONYM +SYSDATE +SYSID +SYSTEM +SYSTEM_USER +TABLE +TABLE_NAME +TABLES +TABLESAMPLE +TABLESPACE +TEMP +TEMPLATE +TEMPORARY +TERMINATE +TERMINATED +TEXT +TEXTSIZE +THAN +THEN +TIES +TIME +TIMESTAMP +TIMEZONE_HOUR +TIMEZONE_MINUTE +TINYBLOB +TINYINT +TINYTEXT +TO +TOAST +TOP +TOP_LEVEL_COUNT +TRAILING +TRAN +TRANSACTION +TRANSACTION_ACTIVE +TRANSACTIONS_COMMITTED +TRANSACTIONS_ROLLED_BACK +TRANSFORM +TRANSFORMS +TRANSLATE +TRANSLATION +TREAT +TRIGGER +TRIGGER_CATALOG +TRIGGER_NAME +TRIGGER_SCHEMA +TRIM +TRUE +TRUNCATE +TRUSTED +TSEQUAL +TYPE +UESCAPE +UID +UNBOUNDED +UNCOMMITTED +UNDER +UNDO +UNENCRYPTED +UNION +UNIQUE +UNKNOWN +UNLISTEN +UNLOCK +UNNAMED +UNNEST +UNSIGNED +UNTIL +UPDATE +UPDATETEXT +UPPER +USAGE +USE +USER +USER_DEFINED_TYPE_CATALOG +USER_DEFINED_TYPE_CODE +USER_DEFINED_TYPE_NAME +USER_DEFINED_TYPE_SCHEMA +USING +UTC_DATE +UTC_TIME +UTC_TIMESTAMP +VACUUM +VALID +VALIDATE +VALIDATOR +VALUE +VALUES +VAR_POP +VAR_SAMP +VARBINARY +VARCHAR +VARCHAR2 +VARCHARACTER +VARIABLE +VARIABLES +VARYING +VERBOSE +VIEW +VOLATILE +WAITFOR +WHEN +WHENEVER +WHERE +WHILE +WIDTH_BUCKET +WINDOW +WITH +WITHIN +WITHOUT +WORK +WRITE +WRITETEXT +X509 +XOR +YEAR +YEAR_MONTH +ZEROFILL +ZONE \ No newline at end of file diff --git a/technologies/SqlFactExtractor/test.py b/technologies/SqlFactExtractor/test.py new file mode 100644 index 000000000..5ca2daf6a --- /dev/null +++ b/technologies/SqlFactExtractor/test.py @@ -0,0 +1,11 @@ +from SQLFactExtractor import * + +extractor = SQLFactExtractor(sys.argv[1], False) +file_open = open("example/testResult.json","w") +file_open.write(json.dumps(extractor.extract_file())) +file_open.close() + +extractor = SQLFactExtractor(sys.argv[1], True) +file_open = open("example/testResultWithLineNumbers.json","w") +file_open.write(json.dumps(extractor.extract_file())) +file_open.close() diff --git a/technologies/SqlFragmentLocator/Makefile b/technologies/SqlFragmentLocator/Makefile new file mode 100644 index 000000000..28433c513 --- /dev/null +++ b/technologies/SqlFragmentLocator/Makefile @@ -0,0 +1,19 @@ +test: + + ./locator.py sql_file example/createTable.sql > example/testResultFile.json + diff --ignore-all-space example/estimatedResultFile.json example/testResultFile.json + + ./locator.py create_statement/1 example/createTable.sql > example/testResultCreate.json + diff --ignore-all-space example/estimatedResultCreate.json example/testResultCreate.json + + ./locator.py table/COMPANY example/createTable.sql > example/testResultTable.json + diff --ignore-all-space example/estimatedResultTable.json example/testResultTable.json + + ./locator.py column/COMPANY/name example/createTable.sql > example/testResultColumn.json + diff --ignore-all-space example/estimatedResultColumn.json example/testResultColumn.json + + rm example/testResultFile.json + rm example/testResultCreate.json + rm example/testResultTable.json + rm example/testResultColumn.json + diff --git a/technologies/SqlFragmentLocator/README.md b/technologies/SqlFragmentLocator/README.md new file mode 100644 index 000000000..58365eabf --- /dev/null +++ b/technologies/SqlFragmentLocator/README.md @@ -0,0 +1,31 @@ +# Headline + +A fragment locator for Sql + +# Usage + +The executable is "locator.py". + +The "/"-based fragment selector is provided as a command-line argument. + +The source code on which to apply fragment location is read from stdin. + +The determined line range of the fragment is written in JSON format to stdout. + +The following fragment selectors/formats are supported: + +* sql_file +* create_statement/Index +* table/table_name +* column/table_name/column_name + + + +# Testing + +Test the tool with "make test". + +See the Makefile for details. + +Fragment location is permformed on example/createTable.sql. + diff --git a/technologies/SqlFragmentLocator/SQLClassifier.py b/technologies/SqlFragmentLocator/SQLClassifier.py new file mode 100644 index 000000000..e8d639eb9 --- /dev/null +++ b/technologies/SqlFragmentLocator/SQLClassifier.py @@ -0,0 +1,4 @@ +CLASSIFIER_FILE = "sql_file" +CLASSIFIER_CREATE = "create_statement" +CLASSIFIER_TABLE = "table" +CLASSIFIER_COLUMN = "column" \ No newline at end of file diff --git a/technologies/SqlFragmentLocator/SQLFactExtractor.py b/technologies/SqlFragmentLocator/SQLFactExtractor.py new file mode 100644 index 000000000..24c9c7c12 --- /dev/null +++ b/technologies/SqlFragmentLocator/SQLFactExtractor.py @@ -0,0 +1,229 @@ +import sqlparse +import sys +import json +import re +from SQLClassifier import * + +class SQLFactExtractor(object): + """docstring for SQLFactExtractor""" + + def enumerate_auto(*sequential, **named): + enums = dict(zip(sequential, range(len(sequential))), **named) + return type('Enum', (), enums) + + StatementType = enumerate_auto( + 'CREATE', + 'ALTER', + 'UNKNOWN' + ) + + ExpectedToken = enumerate_auto( + 'NONE', + 'BEGIN', + 'VARIABLE_NAME', + 'VARIABLE_TYPE' + ) + + def __init__(self, file, log_code): + self.sqlDatei = file + self.load_reserved_sql_keywords() + self.log_code = log_code + + def load_reserved_sql_keywords(self): + self.reserved_keywords = [] + for line in open("sqlKeyWords.txt", "r"): + self.reserved_keywords.append(str.lower(line.rstrip())) + + def get_statement_type(self,statement): + for my_token in statement.tokens: + if str.upper(str(my_token)) == "CREATE": + return self.StatementType.CREATE + elif str.upper(str(my_token)) == "ALTER": + return self.StatementType.ALTER + return self.StatementType.UNKNOWN + + def miss_whitespace(self,tokens_generator_elem): + while tokens_generator_elem.is_whitespace(): + tokens_generator_elem = tokens_generator_elem.next(); + return tokens_generator_elem + + def extract_file(self): + fragment_result = {"classifier": CLASSIFIER_FILE, "fragments": []} + constraints_list = [] + + for each in sqlparse.parse(open(self.sqlDatei).read()): + if each.get_type() != "UNKNOWN": + statement_type = self.get_statement_type(each) + if self.StatementType.CREATE == statement_type:#create + self.extract_create_statement(each, fragment_result) + elif self.StatementType.ALTER == statement_type:#alter + self.extract_alter_statement(each, constraints_list) + + self.add_contraints(fragment_result, constraints_list) + if self.log_code: + self.add_code_linenumbers(fragment_result) + + return fragment_result + + def extract_create_statement(self, each, fragment_result): + subject_token = None + item_list = [] + expected_token = self.ExpectedToken.BEGIN + + #get Relevant Data + for token in each.tokens: + #print(dir(token)) + #print(token.value) + if str(token) not in ['create', 'table'] and not token.is_whitespace() and str(token) != ";": + subject_token = token.flatten().next() + for sub_token in token.flatten(): + if self.ExpectedToken.BEGIN == expected_token and '(' in str(sub_token): + expected_token = self.ExpectedToken.VARIABLE_NAME + elif self.ExpectedToken.VARIABLE_NAME == expected_token and not sub_token.is_whitespace(): + if str.lower(str(sub_token)) in self.reserved_keywords: + expected_token = self.ExpectedToken.NONE + else: + item_list.append(str(sub_token)) + expected_token = self.ExpectedToken.VARIABLE_TYPE + elif self.ExpectedToken.VARIABLE_TYPE == expected_token and not sub_token.is_whitespace(): + item_list.append(str(sub_token)) + expected_token = self.ExpectedToken.NONE + elif self.ExpectedToken.NONE == expected_token and ',' in str(sub_token): + expected_token = self.ExpectedToken.VARIABLE_NAME + # create fragments as JSON + statement_fragment = {"classifier": CLASSIFIER_CREATE, "fragments": []} + table_fragment = {"classifier": CLASSIFIER_TABLE, "name": str(subject_token), "fragments": []} + i = 0 + while i < len(item_list): + table_fragment["fragments"].append( + {"classifier": CLASSIFIER_COLUMN, "name": item_list[i], "type": item_list[i + 1], "fragments": []}) + i += 2 + statement_fragment["fragments"].append(table_fragment) + fragment_result["fragments"].append(statement_fragment) + + self.add_code_to_fragments(each, statement_fragment) + + + def extract_alter_statement(self, each, constraints_list): + subject_table = None + constraint_name = "" + foreign_key_var = "" + references = "" + #get Relevant Data + i = 0 + while str.lower(str(each.tokens[i])) in ['alter', 'table'] or each.tokens[i].is_whitespace(): + i += 1 + subject_table = str(each.tokens[i]) + while str.lower(str(each.tokens[i])) != 'constraint' or each.tokens[i].is_whitespace(): + i += 1 + constraint_name = str(each.tokens[i + 2]) + while (str.lower(str(each.tokens[i])) != 'foreign' and str.lower(str(each.tokens[i + 2])) != 'key') or \ + each.tokens[i].is_whitespace(): + i += 1 + foreign_key_var = str(each.tokens[i + 4]) + foreign_key_var = foreign_key_var[1:len(foreign_key_var) - 1] + while str.lower(str(each.tokens[i])) != 'references' or each.tokens[i].is_whitespace(): + i += 1 + references = str(each.tokens[i + 2]) + + constraints_list.append({ + "subject_table": subject_table, + "constraint_name": constraint_name, + "foreign_key_var": foreign_key_var, + "references": references + }) + + def add_code_to_fragments(self, each, statement_fragment): + if self.log_code: + statement_fragment["code"] = str(each) + statement_fragment["fragments"][0]["code"] = self.delete_beginning_control_characters(self.remouve_beginning_create(str(each))) + + #filter column code + long_string = str(each) + long_string = long_string[long_string.find("(")+1:] + column_code_list = self.format_end_of_column_generation( long_string.split(",")) + + for column_id in range(0, len(statement_fragment["fragments"][0]["fragments"])): + statement_fragment["fragments"][0]["fragments"][column_id]["code"] = self.delete_beginning_control_characters(column_code_list[column_id]) + + def remouve_beginning_create(self, code_string): + return code_string[code_string.find("table"):] + + def format_end_of_column_generation(self, column_codes): + column_codes[-1] = self.delete_last_char_if_equals(column_codes[-1], ";") + column_codes[-1] = self.delete_last_char_if_equals(column_codes[-1], ")") + + return column_codes + + def delete_last_char_if_equals(self, string, char): + if char == string[-1]: + string = string[:-1] + return string + + def delete_beginning_control_characters(self, string): + return string[re.search("\w",string).start() :] + + def add_contraints(self, fragment_result, constraints_list): + for constraint in constraints_list: + #find table + for create in fragment_result["fragments"]: + if create["fragments"][0]["name"] == constraint["subject_table"]: + #find column + for column in create["fragments"][0]["fragments"]: + if column["name"] == constraint["foreign_key_var"]: + column["constraints"] = [{ + "type": "foreign_key", + "references": constraint["references"], + "name": constraint["constraint_name"] + }] + + def add_code_linenumbers(self, fragment_result): + self.add_file_linenumbers(fragment_result) + + self.add_fragment_linenumbers(fragment_result["fragments"], 1, self.get_file_length(self.sqlDatei)) + + def add_fragment_linenumbers(self, fragment_pointer, start, end): + if type(fragment_pointer) == list: + for frag_elem in fragment_pointer: + self.add_fragment_linenumbers(frag_elem, start, end) + elif type(fragment_pointer) == dict: + self.derive_linenumber(fragment_pointer, start, end) + self.add_fragment_linenumbers(fragment_pointer["fragments"], fragment_pointer["line_start"], fragment_pointer["line_end"]) + else: + print("ERROR: add_code_linenumbers UNKNOWN type:"+str(type(fragment_pointer))) + + def derive_linenumber(self, fragment_pointer, start, end): + open_file = open(self.sqlDatei, "r") + + self.go_to_line(open_file, start) + + char_pointer_fragment = 0 + line_counter = start + start_line = start + while char_pointer_fragment < len(fragment_pointer["code"]): + char = open_file.read(1) + if("\n" == char): + line_counter += 1 + if char == fragment_pointer["code"][char_pointer_fragment]: + if char_pointer_fragment == 0: + start_line = line_counter + char_pointer_fragment += 1 + else: + char_pointer_fragment = 0 + + fragment_pointer["line_start"] = start_line + fragment_pointer["line_end"] = line_counter + + def go_to_line(self, open_file, start): + for times in range(1,start): + open_file.readline() + + def add_file_linenumbers(self, fragment_result): + fragment_result["line_start"] = 1 + fragment_result["line_end"] = self.get_file_length(self.sqlDatei) + + def get_file_length(self, file_string): + linenumber = 0 + for line in open(self.sqlDatei): + linenumber += 1 + return linenumber diff --git a/technologies/SqlFragmentLocator/example/createTable.sql b/technologies/SqlFragmentLocator/example/createTable.sql new file mode 100644 index 000000000..7fb25e35c --- /dev/null +++ b/technologies/SqlFragmentLocator/example/createTable.sql @@ -0,0 +1,35 @@ +create + table COMPANY + ( + ID bigint generated by default as identity (start with 1), + name varchar(255), + primary key (ID) + ); + +create + table DEPARTMENT + ( + ID bigint generated by default as identity (start with 1), + name varchar(255), + COMP_ID bigint, + DEPT_ID bigint, + primary key (ID) + ); + +create + table EMPLOYEE + ( + ID bigint generated by default as identity (start with 1), + name varchar(255), + address varchar(255), + salary double, + manager bit, + MENTOR bigint, + DEPT_ID bigint, + primary key (ID) + ); + +alter table DEPARTMENT add constraint FK4F782F5255C77F64 foreign key (DEPT_ID) references DEPARTMENT; +alter table DEPARTMENT add constraint FK4F782F52C7CB872B foreign key (COMP_ID) references COMPANY; +alter table EMPLOYEE add constraint FK75C8D6AE55C77F64 foreign key (DEPT_ID) references DEPARTMENT; +alter table EMPLOYEE add constraint FK75C8D6AE800BE06C foreign key (MENTOR) references EMPLOYEE; \ No newline at end of file diff --git a/technologies/SqlFragmentLocator/example/estimatedResultColumn.json b/technologies/SqlFragmentLocator/example/estimatedResultColumn.json new file mode 100644 index 000000000..933541167 --- /dev/null +++ b/technologies/SqlFragmentLocator/example/estimatedResultColumn.json @@ -0,0 +1 @@ +{"to": 5, "from": 5} diff --git a/technologies/SqlFragmentLocator/example/estimatedResultCreate.json b/technologies/SqlFragmentLocator/example/estimatedResultCreate.json new file mode 100644 index 000000000..2efd85e35 --- /dev/null +++ b/technologies/SqlFragmentLocator/example/estimatedResultCreate.json @@ -0,0 +1 @@ +{"to": 17, "from": 8} diff --git a/technologies/SqlFragmentLocator/example/estimatedResultFile.json b/technologies/SqlFragmentLocator/example/estimatedResultFile.json new file mode 100644 index 000000000..b38805df7 --- /dev/null +++ b/technologies/SqlFragmentLocator/example/estimatedResultFile.json @@ -0,0 +1 @@ +{"to": 35, "from": 1} \ No newline at end of file diff --git a/technologies/SqlFragmentLocator/example/estimatedResultTable.json b/technologies/SqlFragmentLocator/example/estimatedResultTable.json new file mode 100644 index 000000000..5baafc3b2 --- /dev/null +++ b/technologies/SqlFragmentLocator/example/estimatedResultTable.json @@ -0,0 +1 @@ +{"to": 7, "from": 2} \ No newline at end of file diff --git a/technologies/SqlFragmentLocator/locator.py b/technologies/SqlFragmentLocator/locator.py new file mode 100755 index 000000000..5918d8130 --- /dev/null +++ b/technologies/SqlFragmentLocator/locator.py @@ -0,0 +1,39 @@ +#! /usr/bin/env python +from SQLFactExtractor import * + +def get_table_by_name(sql_fragments, name): + for statement in sql_fragments["fragments"]: + if name == statement["fragments"][0]["name"]: + return statement["fragments"][0] + +def get_column_by_name(table, name): + for column in table["fragments"]: + if name == column["name"]: + return column + + +sql_fragments = SQLFactExtractor(sys.argv[2], True).extract_file() + +# sql_file or +# create_statement/Index or +# table/table_name or +# column/table_name/column_name +fragment_locator = sys.argv[1].split("/") + +if CLASSIFIER_FILE == fragment_locator[0]: + print(json.dumps({"from" : sql_fragments["line_start"], "to":sql_fragments["line_end"]})) +elif CLASSIFIER_CREATE == fragment_locator[0]: + if fragment_locator[1].isdigit(): + element = sql_fragments["fragments"][int(fragment_locator[1])] + print(json.dumps({"from" : element["line_start"], "to":element["line_end"]})) + else: + print("Error: Right Format is create_statement/Index") +elif CLASSIFIER_TABLE == fragment_locator[0]: + element = get_table_by_name(sql_fragments, fragment_locator[1]) + print(json.dumps({"from" : element["line_start"], "to":element["line_end"]})) +elif CLASSIFIER_COLUMN == fragment_locator[0]: + table = get_table_by_name(sql_fragments, fragment_locator[1]) + element = get_column_by_name(table, fragment_locator[2]) + print(json.dumps({"from" : element["line_start"], "to":element["line_end"]})) +else: + print("ERROR unknown Fragment-Classifier") diff --git a/technologies/SqlFragmentLocator/sqlKeyWords.txt b/technologies/SqlFragmentLocator/sqlKeyWords.txt new file mode 100644 index 000000000..94b833c1c --- /dev/null +++ b/technologies/SqlFragmentLocator/sqlKeyWords.txt @@ -0,0 +1,824 @@ +A +ABORT +ABS +ABSOLUTE +ACCESS +ACTION +ADA +ADD +ADMIN +AFTER +AGGREGATE +ALIAS +ALL +ALLOCATE +ALSO +ALTER +ALWAYS +ANALYSE +ANALYZE +AND +ANY +ARE +ARRAY +AS +ASC +ASENSITIVE +ASSERTION +ASSIGNMENT +ASYMMETRIC +AT +ATOMIC +ATTRIBUTE +ATTRIBUTES +AUDIT +AUTHORIZATION +AUTO_INCREMENT +AVG +AVG_ROW_LENGTH +BACKUP +BACKWARD +BEFORE +BEGIN +BERNOULLI +BETWEEN +BIGINT +BINARY +BIT +BIT_LENGTH +BITVAR +BLOB +BOOL +BOOLEAN +BOTH +BREADTH +BREAK +BROWSE +BULK +BY +C +CACHE +CALL +CALLED +CARDINALITY +CASCADE +CASCADED +CASE +CAST +CATALOG +CATALOG_NAME +CEIL +CEILING +CHAIN +CHANGE +CHAR +CHAR_LENGTH +CHARACTER +CHARACTER_LENGTH +CHARACTER_SET_CATALOG +CHARACTER_SET_NAME +CHARACTER_SET_SCHEMA +CHARACTERISTICS +CHARACTERS +CHECK +CHECKED +CHECKPOINT +CHECKSUM +CLASS +CLASS_ORIGIN +CLOB +CLOSE +CLUSTER +CLUSTERED +COALESCE +COBOL +COLLATE +COLLATION +COLLATION_CATALOG +COLLATION_NAME +COLLATION_SCHEMA +COLLECT +COLUMN +COLUMN_NAME +COLUMNS +COMMAND_FUNCTION +COMMAND_FUNCTION_CODE +COMMENT +COMMIT +COMMITTED +COMPLETION +COMPRESS +COMPUTE +CONDITION +CONDITION_NUMBER +CONNECT +CONNECTION +CONNECTION_NAME +CONSTRAINT +CONSTRAINT_CATALOG +CONSTRAINT_NAME +CONSTRAINT_SCHEMA +CONSTRAINTS +CONSTRUCTOR +CONTAINS +CONTAINSTABLE +CONTINUE +CONVERSION +CONVERT +COPY +CORR +CORRESPONDING +COUNT +COVAR_POP +COVAR_SAMP +CREATE +CREATEDB +CREATEROLE +CREATEUSER +CROSS +CSV +CUBE +CUME_DIST +CURRENT +CURRENT_DATE +CURRENT_DEFAULT_TRANSFORM_GROUP +CURRENT_PATH +CURRENT_ROLE +CURRENT_TIME +CURRENT_TIMESTAMP +CURRENT_TRANSFORM_GROUP_FOR_TYPE +CURRENT_USER +CURSOR +CURSOR_NAME +CYCLE +DATA +DATABASE +DATABASES +DATE +DATETIME +DATETIME_INTERVAL_CODE +DATETIME_INTERVAL_PRECISION +DAY +DAY_HOUR +DAY_MICROSECOND +DAY_MINUTE +DAY_SECOND +DAYOFMONTH +DAYOFWEEK +DAYOFYEAR +DBCC +DEALLOCATE +DEC +DECIMAL +DECLARE +DEFAULT +DEFAULTS +DEFERRABLE +DEFERRED +DEFINED +DEFINER +DEGREE +DELAY_KEY_WRITE +DELAYED +DELETE +DELIMITER +DELIMITERS +DENSE_RANK +DENY +DEPTH +DEREF +DERIVED +DESC +DESCRIBE +DESCRIPTOR +DESTROY +DESTRUCTOR +DETERMINISTIC +DIAGNOSTICS +DICTIONARY +DISABLE +DISCONNECT +DISK +DISPATCH +DISTINCT +DISTINCTROW +DISTRIBUTED +DIV +DO +DOMAIN +DOUBLE +DROP +DUAL +DUMMY +DUMP +DYNAMIC +DYNAMIC_FUNCTION +DYNAMIC_FUNCTION_CODE +EACH +ELEMENT +ELSE +ELSEIF +ENABLE +ENCLOSED +ENCODING +ENCRYPTED +END +END-EXEC +ENUM +EQUALS +ERRLVL +ESCAPE +ESCAPED +EVERY +EXCEPT +EXCEPTION +EXCLUDE +EXCLUDING +EXCLUSIVE +EXEC +EXECUTE +EXISTING +EXISTS +EXIT +EXP +EXPLAIN +EXTERNAL +EXTRACT +FALSE +FETCH +FIELDS +FILE +FILLFACTOR +FILTER +FINAL +FIRST +FLOAT +FLOAT4 +FLOAT8 +FLOOR +FLUSH +FOLLOWING +FOR +FORCE +FOREIGN +FORTRAN +FORWARD +FOUND +FREE +FREETEXT +FREETEXTTABLE +FREEZE +FROM +FULL +FULLTEXT +FUNCTION +FUSION +G +GENERAL +GENERATED +GET +GLOBAL +GO +GOTO +GRANT +GRANTED +GRANTS +GREATEST +GROUP +GROUPING +HANDLER +HAVING +HEADER +HEAP +HIERARCHY +HIGH_PRIORITY +HOLD +HOLDLOCK +HOST +HOSTS +HOUR +HOUR_MICROSECOND +HOUR_MINUTE +HOUR_SECOND +IDENTIFIED +IDENTITY +IDENTITY_INSERT +IDENTITYCOL +IF +IGNORE +ILIKE +IMMEDIATE +IMMUTABLE +IMPLEMENTATION +IMPLICIT +IN +INCLUDE +INCLUDING +INCREMENT +INDEX +INDICATOR +INFILE +INFIX +INHERIT +INHERITS +INITIAL +INITIALIZE +INITIALLY +INNER +INOUT +INPUT +INSENSITIVE +INSERT +INSERT_ID +INSTANCE +INSTANTIABLE +INSTEAD +INT +INT1 +INT2 +INT3 +INT4 +INT8 +INTEGER +INTERSECT +INTERSECTION +INTERVAL +INTO +INVOKER +IS +ISAM +ISNULL +ISOLATION +ITERATE +JOIN +K +KEY +KEY_MEMBER +KEY_TYPE +KEYS +KILL +LANCOMPILER +LANGUAGE +LARGE +LAST +LAST_INSERT_ID +LATERAL +LEADING +LEAST +LEAVE +LEFT +LENGTH +LESS +LEVEL +LIKE +LIMIT +LINENO +LINES +LISTEN +LN +LOAD +LOCAL +LOCALTIME +LOCALTIMESTAMP +LOCATION +LOCATOR +LOCK +LOGIN +LOGS +LONG +LONGBLOB +LONGTEXT +LOOP +LOW_PRIORITY +LOWER +M +MAP +MATCH +MATCHED +MAX +MAX_ROWS +MAXEXTENTS +MAXVALUE +MEDIUMBLOB +MEDIUMINT +MEDIUMTEXT +MEMBER +MERGE +MESSAGE_LENGTH +MESSAGE_OCTET_LENGTH +MESSAGE_TEXT +METHOD +MIDDLEINT +MIN +MIN_ROWS +MINUS +MINUTE +MINUTE_MICROSECOND +MINUTE_SECOND +MINVALUE +MLSLABEL +MOD +MODE +MODIFIES +MODIFY +MODULE +MONTH +MONTHNAME +MORE +MOVE +MULTISET +MUMPS +MYISAM +NAMES +NATIONAL +NATURAL +NCHAR +NCLOB +NESTING +NEW +NEXT +NO +NO_WRITE_TO_BINLOG +NOAUDIT +NOCHECK +NOCOMPRESS +NOCREATEDB +NOCREATEROLE +NOCREATEUSER +NOINHERIT +NOLOGIN +NONCLUSTERED +NONE +NORMALIZE +NORMALIZED +NOSUPERUSER +NOT +NOTHING +NOTIFY +NOTNULL +NOWAIT +NULL +NULLABLE +NULLIF +NULLS +NUMBER +NUMERIC +OBJECT +OCTET_LENGTH +OCTETS +OF +OFF +OFFLINE +OFFSET +OFFSETS +OIDS +OLD +ON +ONLINE +ONLY +OPEN +OPENDATASOURCE +OPENQUERY +OPENROWSET +OPENXML +OPERATION +OPERATOR +OPTIMIZE +OPTION +OPTIONALLY +OPTIONS +OR +ORDER +ORDERING +ORDINALITY +OTHERS +OUT +OUTER +OUTFILE +OUTPUT +OVER +OVERLAPS +OVERLAY +OVERRIDING +OWNER +PACK_KEYS +PAD +PARAMETER +PARAMETER_MODE +PARAMETER_NAME +PARAMETER_ORDINAL_POSITION +PARAMETER_SPECIFIC_CATALOG +PARAMETER_SPECIFIC_NAME +PARAMETER_SPECIFIC_SCHEMA +PARAMETERS +PARTIAL +PARTITION +PASCAL +PASSWORD +PATH +PCTFREE +PERCENT +PERCENT_RANK +PERCENTILE_CONT +PERCENTILE_DISC +PLACING +PLAN +PLI +POSITION +POSTFIX +POWER +PRECEDING +PRECISION +PREFIX +PREORDER +PREPARE +PREPARED +PRESERVE +PRIMARY +PRINT +PRIOR +PRIVILEGES +PROC +PROCEDURAL +PROCEDURE +PROCESS +PROCESSLIST +PUBLIC +PURGE +QUOTE +RAID0 +RAISERROR +RANGE +RANK +RAW +READ +READS +READTEXT +REAL +RECHECK +RECONFIGURE +RECURSIVE +REF +REFERENCES +REFERENCING +REGEXP +REGR_AVGX +REGR_AVGY +REGR_COUNT +REGR_INTERCEPT +REGR_R2 +REGR_SLOPE +REGR_SXX +REGR_SXY +REGR_SYY +REINDEX +RELATIVE +RELEASE +RELOAD +RENAME +REPEAT +REPEATABLE +REPLACE +REPLICATION +REQUIRE +RESET +RESIGNAL +RESOURCE +RESTART +RESTORE +RESTRICT +RESULT +RETURN +RETURNED_CARDINALITY +RETURNED_LENGTH +RETURNED_OCTET_LENGTH +RETURNED_SQLSTATE +RETURNS +REVOKE +RIGHT +RLIKE +ROLE +ROLLBACK +ROLLUP +ROUTINE +ROUTINE_CATALOG +ROUTINE_NAME +ROUTINE_SCHEMA +ROW +ROW_COUNT +ROW_NUMBER +ROWCOUNT +ROWGUIDCOL +ROWID +ROWNUM +ROWS +RULE +SAVE +SAVEPOINT +SCALE +SCHEMA +SCHEMA_NAME +SCHEMAS +SCOPE +SCOPE_CATALOG +SCOPE_NAME +SCOPE_SCHEMA +SCROLL +SEARCH +SECOND +SECOND_MICROSECOND +SECTION +SECURITY +SELECT +SELF +SENSITIVE +SEPARATOR +SEQUENCE +SERIALIZABLE +SERVER_NAME +SESSION +SESSION_USER +SET +SETOF +SETS +SETUSER +SHARE +SHOW +SHUTDOWN +SIGNAL +SIMILAR +SIMPLE +SIZE +SMALLINT +SOME +SONAME +SOURCE +SPACE +SPATIAL +SPECIFIC +SPECIFIC_NAME +SPECIFICTYPE +SQL +SQL_BIG_RESULT +SQL_BIG_SELECTS +SQL_BIG_TABLES +SQL_CALC_FOUND_ROWS +SQL_LOG_OFF +SQL_LOG_UPDATE +SQL_LOW_PRIORITY_UPDATES +SQL_SELECT_LIMIT +SQL_SMALL_RESULT +SQL_WARNINGS +SQLCA +SQLCODE +SQLERROR +SQLEXCEPTION +SQLSTATE +SQLWARNING +SQRT +SSL +STABLE +START +STARTING +STATE +STATEMENT +STATIC +STATISTICS +STATUS +STDDEV_POP +STDDEV_SAMP +STDIN +STDOUT +STORAGE +STRAIGHT_JOIN +STRICT +STRING +STRUCTURE +STYLE +SUBCLASS_ORIGIN +SUBLIST +SUBMULTISET +SUBSTRING +SUCCESSFUL +SUM +SUPERUSER +SYMMETRIC +SYNONYM +SYSDATE +SYSID +SYSTEM +SYSTEM_USER +TABLE +TABLE_NAME +TABLES +TABLESAMPLE +TABLESPACE +TEMP +TEMPLATE +TEMPORARY +TERMINATE +TERMINATED +TEXT +TEXTSIZE +THAN +THEN +TIES +TIME +TIMESTAMP +TIMEZONE_HOUR +TIMEZONE_MINUTE +TINYBLOB +TINYINT +TINYTEXT +TO +TOAST +TOP +TOP_LEVEL_COUNT +TRAILING +TRAN +TRANSACTION +TRANSACTION_ACTIVE +TRANSACTIONS_COMMITTED +TRANSACTIONS_ROLLED_BACK +TRANSFORM +TRANSFORMS +TRANSLATE +TRANSLATION +TREAT +TRIGGER +TRIGGER_CATALOG +TRIGGER_NAME +TRIGGER_SCHEMA +TRIM +TRUE +TRUNCATE +TRUSTED +TSEQUAL +TYPE +UESCAPE +UID +UNBOUNDED +UNCOMMITTED +UNDER +UNDO +UNENCRYPTED +UNION +UNIQUE +UNKNOWN +UNLISTEN +UNLOCK +UNNAMED +UNNEST +UNSIGNED +UNTIL +UPDATE +UPDATETEXT +UPPER +USAGE +USE +USER +USER_DEFINED_TYPE_CATALOG +USER_DEFINED_TYPE_CODE +USER_DEFINED_TYPE_NAME +USER_DEFINED_TYPE_SCHEMA +USING +UTC_DATE +UTC_TIME +UTC_TIMESTAMP +VACUUM +VALID +VALIDATE +VALIDATOR +VALUE +VALUES +VAR_POP +VAR_SAMP +VARBINARY +VARCHAR +VARCHAR2 +VARCHARACTER +VARIABLE +VARIABLES +VARYING +VERBOSE +VIEW +VOLATILE +WAITFOR +WHEN +WHENEVER +WHERE +WHILE +WIDTH_BUCKET +WINDOW +WITH +WITHIN +WITHOUT +WORK +WRITE +WRITETEXT +X509 +XOR +YEAR +YEAR_MONTH +ZEROFILL +ZONE \ No newline at end of file