diff --git a/modules/datastore/modules/datastore_mysql_import/src/Service/MysqlImport.php b/modules/datastore/modules/datastore_mysql_import/src/Service/MysqlImport.php index 67a55d539d..3d924ca897 100644 --- a/modules/datastore/modules/datastore_mysql_import/src/Service/MysqlImport.php +++ b/modules/datastore/modules/datastore_mysql_import/src/Service/MysqlImport.php @@ -10,8 +10,6 @@ /** * Expiremental MySQL LOAD DATA importer. - * - * @codeCoverageIgnore */ class MysqlImport extends ImportJob { @@ -26,65 +24,6 @@ class MysqlImport extends ImportJob { '\n' => "\n", ]; - /** - * The maximum length of a MySQL table column name. - * - * @var int - */ - protected const MAX_COLUMN_LENGTH = 64; - - /** - * List of reserved words in MySQL 5.6-8 and MariaDB. - * - * @var string[] - */ - protected const RESERVED_WORDS = ['accessible', 'add', 'all', 'alter', 'analyze', - 'and', 'as', 'asc', 'asensitive', 'before', 'between', 'bigint', 'binary', - 'blob', 'both', 'by', 'call', 'cascade', 'case', 'change', 'char', - 'character', 'check', 'collate', 'column', 'condition', 'constraint', - 'continue', 'convert', 'create', 'cross', 'cube', 'cume_dist', - 'current_date', 'current_role', 'current_time', 'current_timestamp', - 'current_user', 'cursor', 'database', 'databases', 'day_hour', - 'day_microsecond', 'day_minute', 'day_second', 'dec', 'decimal', 'declare', - 'default', 'delayed', 'delete', 'dense_rank', 'desc', 'describe', - 'deterministic', 'distinct', 'distinctrow', 'div', 'do_domain_ids', - 'double', 'drop', 'dual', 'each', 'else', 'elseif', 'empty', 'enclosed', - 'escaped', 'except', 'exists', 'exit', 'explain', 'false', 'fetch', - 'first_value', 'float', 'float4', 'float8', 'for', 'force', 'foreign', - 'from', 'fulltext', 'function', 'general', 'generated', 'get', 'grant', - 'group', 'grouping', 'groups', 'having', 'high_priority', 'hour_microsecond', - 'hour_minute', 'hour_second', 'if', 'ignore', 'ignore_domain_ids', - 'ignore_server_ids', 'in', 'index', 'infile', 'inner', 'inout', - 'insensitive', 'insert', 'int', 'int1', 'int2', 'int3', 'int4', 'int8', - 'integer', 'intersect', 'interval', 'into', 'io_after_gtids', - 'io_before_gtids', 'is', 'iterate', 'join', 'json_table', 'key', 'keys', - 'kill', 'lag', 'last_value', 'lateral', 'lead', 'leading', 'leave', 'left', - 'like', 'limit', 'linear', 'lines', 'load', 'localtime', 'localtimestamp', - 'lock', 'long', 'longblob', 'longtext', 'loop', 'low_priority', - 'master_bind', 'master_heartbeat_period', 'master_ssl_verify_server_cert', - 'match', 'maxvalue', 'mediumblob', 'mediumint', 'mediumtext', 'middleint', - 'minute_microsecond', 'minute_second', 'mod', 'modifies', 'natural', 'not', - 'no_write_to_binlog', 'nth_value', 'ntile', 'null', 'numeric', 'of', - 'offset', 'on', 'optimize', 'optimizer_costs', 'option', 'optionally', - 'or', 'order', 'out', 'outer', 'outfile', 'over', 'page_checksum', - 'parse_vcol_expr', 'partition', 'percent_rank', 'position', 'precision', - 'primary', 'procedure', 'purge', 'range', 'rank', 'read', 'reads', - 'read_write', 'real', 'recursive', 'references', 'ref_system_id', 'regexp', - 'release', 'rename', 'repeat', 'replace', 'require', 'resignal', - 'restrict', 'return', 'returning', 'revoke', 'right', 'rlike', 'row', - 'row_number', 'rows', 'schema', 'schemas', 'second_microsecond', 'select', - 'sensitive', 'separator', 'set', 'show', 'signal', 'slow', 'smallint', - 'spatial', 'specific', 'sql', 'sql_big_result', 'sql_calc_found_rows', - 'sqlexception', 'sql_small_result', 'sqlstate', 'sqlwarning', 'ssl', - 'starting', 'stats_auto_recalc', 'stats_persistent', 'stats_sample_pages', - 'stored', 'straight_join', 'system', 'table', 'terminated', 'then', - 'tinyblob', 'tinyint', 'tinytext', 'to', 'trailing', 'trigger', 'true', - 'undo', 'union', 'unique', 'unlock', 'unsigned', 'update', 'usage', 'use', - 'using', 'utc_date', 'utc_time', 'utc_timestamp', 'values', 'varbinary', - 'varchar', 'varcharacter', 'varying', 'virtual', 'when', 'where', 'while', - 'window', 'with', 'write', 'xor', 'year_month', 'zerofill', - ]; - /** * Override. * @@ -229,94 +168,28 @@ public function generateTableSpec(array $columns): array { foreach ($columns as $column) { // Sanitize the supplied table header to generate a unique column name; // null-coalesce potentially NULL column names to empty strings. - $name = $this->sanitizeHeader($column ?? ''); + $name = ImportJob::sanitizeHeader($column ?? ''); // Truncate the generated table column name, if necessary, to fit the max // column length. - $name = $this->truncateHeader($name); + $name = ImportJob::truncateHeader($name); // Generate unique numeric suffix for the header if a header already // exists with the same name. for ($i = 2; isset($spec[$name]); $i++) { $suffix = '_' . $i; - $name = substr($name, 0, self::MAX_COLUMN_LENGTH - strlen($suffix)) . $suffix; + $name = substr($name, 0, ImportJob::MAX_COLUMN_LENGTH - strlen($suffix)) . $suffix; } $spec[$name] = [ 'type' => "text", - 'description' => $this->sanitizeDescription($column ?? ''), + 'description' => ImportJob::sanitizeDescription($column ?? ''), ]; } return $spec; } - /** - * Transform possible multiline string to single line for description. - * - * @param string $column - * Column name. - * - * @return string - * Column name on single line. - */ - public function sanitizeDescription(string $column) { - $trimmed = array_filter(array_map('trim', explode("\n", $column))); - return implode(" ", $trimmed); - } - - /** - * Sanitize table column name according to the MySQL supported characters. - * - * @param string $column - * The column name being sanitized. - * - * @returns string - * Sanitized column name. - */ - protected function sanitizeHeader(string $column): string { - // Replace all spaces and newline characters with underscores since they are - // not supported. - $column = preg_replace('/(?: |\r\n|\r|\n)/', '_', $column); - // Strip unsupported characters from the header. - $column = preg_replace('/[^A-Za-z0-9_]/', '', $column); - // Trim underscores from the beginning and end of the column name. - $column = trim($column, '_'); - // Convert the column name to lowercase. - $column = strtolower($column); - - if (is_numeric($column) || in_array($column, self::RESERVED_WORDS)) { - // Prepend "_" to column name that are not allowed in MySQL - // This can be dropped after move to Drupal 9. - // @see https://github.com/GetDKAN/dkan/issues/3606 - $column = '_' . $column; - } - - return $column; - } - - /** - * Truncate column name if longer than the max column length for the database. - * - * @param string $column - * The column name being truncated. - * - * @returns string - * Truncated column name. - */ - protected function truncateHeader(string $column): string { - // If the supplied table column name is longer than the max column length, - // truncate the column name to 5 characters under the max length and - // substitute the truncated characters with a unique hash. - if (strlen($column) > self::MAX_COLUMN_LENGTH) { - $field = substr($column, 0, self::MAX_COLUMN_LENGTH - 5); - $hash = substr(md5($column), 0, 4); - $column = $field . '_' . $hash; - } - - return $column; - } - /** * Construct a SQL file import statement using the given file information. * diff --git a/modules/datastore/src/DataDictionary/AlterTableQuery/MySQLQuery.php b/modules/datastore/src/DataDictionary/AlterTableQuery/MySQLQuery.php index 621a516a43..7a8de94f7b 100644 --- a/modules/datastore/src/DataDictionary/AlterTableQuery/MySQLQuery.php +++ b/modules/datastore/src/DataDictionary/AlterTableQuery/MySQLQuery.php @@ -4,6 +4,7 @@ use Drupal\Core\Database\StatementInterface; +use Drupal\datastore\Plugin\QueueWorker\ImportJob; use Drupal\datastore\DataDictionary\AlterTableQueryBase; use Drupal\datastore\DataDictionary\AlterTableQueryInterface; use Drupal\datastore\DataDictionary\IncompatibleTypeException; @@ -13,6 +14,17 @@ */ class MySQLQuery extends AlterTableQueryBase implements AlterTableQueryInterface { + /** + * Date time specific types. + * + * @var string[] + */ + protected const DATE_TIME_TYPES = [ + 'DATE', + 'TIME', + 'DATETIME', + ]; + /** * Default type to use for fields if no data-dictionary type is specified. * @@ -56,11 +68,42 @@ class MySQLQuery extends AlterTableQueryBase implements AlterTableQueryInterface 'BLOB', ]; + /** + * Mapping between frictionless types and SQL types. + * + * Frictionless type is key, SQL is value. + * + * @var string[] + */ + protected static $frictionlessTypes = [ + 'string' => 'TEXT', + 'number' => 'DECIMAL', + 'integer' => 'INT', + 'date' => 'DATE', + 'time' => 'TIME', + 'datetime' => 'DATETIME', + 'year' => 'YEAR', + 'yearmonth' => 'TINYTEXT', + 'boolean' => 'BOOL', + 'object' => 'TEXT', + 'geopoint' => 'TEXT', + 'geojson' => 'TEXT', + 'array' => 'TEXT', + 'duration' => 'TINYTEXT', + ]; + /** * {@inheritdoc} */ public function doExecute(): void { + // Sanitize field names to match database field names. + $this->fields = $this->sanitizeFields($this->fields); + // Filter out fields which are not present in the database table. $this->fields = $this->mergeFields($this->fields, $this->table); + + // Sanitize index field names to match database field names. + $this->indexes = $this->sanitizeIndexes($this->indexes); + // Filter out indexes with fields which are not present in the table. $this->indexes = $this->mergeIndexes($this->indexes, $this->table); // Build and execute SQL commands to prepare for table alter. @@ -72,6 +115,54 @@ public function doExecute(): void { $command->execute(); } + /** + * Sanitize field names. + * + * @param array $fields + * Query fields. + * + * @return array + * Query fields list with sanitized field names. + */ + protected function sanitizeFields(array $fields): array { + // Iterate through field list... + foreach (array_keys($fields) as $key) { + // Create reference to index field name. + $field_name = &$fields[$key]['name']; + // Sanitize field name. + $field_name = ImportJob::sanitizeHeader($field_name); + } + + return $fields; + } + + /** + * Sanitize index field names. + * + * @param array $indexes + * Query indexes. + * + * @return array + * Query indexes list with sanitized index field names. + */ + protected function sanitizeIndexes(array $indexes): array { + // Iterate through index list... + foreach (array_keys($indexes) as $index_key) { + // Create reference to index field list. + $index_fields = &$indexes[$index_key]['fields']; + + // Iterate through index field list... + foreach (array_keys($index_fields) as $field_key) { + // Create reference to index field name. + $field_name = &$index_fields[$field_key]['name']; + // Sanitize field name. + $field_name = ImportJob::sanitizeHeader($field_name); + } + } + + return $indexes; + } + /** * Remove query fields not found in the given table and copy over names. * @@ -137,7 +228,7 @@ protected function getTableColsAndComments(string $table): array { } /** - * Get MySQL equivalent of the given Frictionless "Table Schema" type. + * Build full MySQL equivalent of the given Frictionless "Table Schema" type. * * @param string $frictionless_type * Frictionless "Table Schema" data type. @@ -147,59 +238,64 @@ protected function getTableColsAndComments(string $table): array { * MySQL table to get type for. * * @return string - * MySQL data type. + * Full MySQL data type. */ protected function getFieldType(string $frictionless_type, string $column, string $table): string { + // Determine MySQL base type. + $base_mysql_type = $this->getBaseType($frictionless_type); // Build the MySQL type argument list. - $args = $this->buildTypeArgs($frictionless_type, $column, $table); + $args = $this->buildTypeArgs($base_mysql_type, $column, $table); + $args_str = !empty($args) ? '(' . implode(', ', $args) . ')' : ''; // Build full MySQL type. - return ([ - 'string' => (fn () => 'TEXT'), - 'number' => (fn ($args) => "DECIMAL({$args['size']}, {$args['decimal']})"), - 'integer' => (fn () => 'INT'), - 'date' => (fn () => 'DATE'), - 'time' => (fn () => 'TIME'), - 'datetime' => (fn () => 'DATETIME'), - 'year' => (fn () => 'YEAR'), - 'yearmonth' => (fn () => 'TINYTEXT'), - 'boolean' => (fn () => 'BOOL'), - 'object' => (fn () => 'TEXT'), - 'geopoint' => (fn () => 'TEXT'), - 'geojson' => (fn () => 'TEXT'), - 'array' => (fn () => 'TEXT'), - 'duration' => (fn () => 'TINYTEXT'), - ])[$frictionless_type]($args); + return $base_mysql_type . $args_str; } /** - * Build MySQL type arg list for the given Frictionless "Table Schema" type. + * Get base MySQL equivalent of the given Frictionless "Table Schema" type. * - * @param string $type + * @param string $frictionless_type * Frictionless "Table Schema" data type. + * + * @return string + * Base MySQL data type. + */ + protected function getBaseType(string $frictionless_type): string { + if ($sql_type = static::$frictionlessTypes[$frictionless_type] ?? FALSE) { + return $sql_type; + } + throw new \InvalidArgumentException($frictionless_type . ' is not a valid frictionless type.'); + } + + /** + * Build MySQL type arg list for MySQL type. + * + * @param string $type + * MySQL data type. * @param string $column * Column name. * @param string $table * Table name. * + * @return array + * MySQL type arguments. + * * @throws Drupal\datastore\DataDictionary\IncompatibleTypeException * When incompatible data is found in the table for the specified type. */ protected function buildTypeArgs(string $type, string $column, string $table): array { - $args = []; - - // If this field is a number field, build decimal and size arguments for - // MySQL type. - if ($type === 'number') { + // If this field is a DECIMAL field, build decimal and size arguments. + if ($type === 'DECIMAL') { $non_decimals = $this->connection->query("SELECT MAX(LENGTH(TRIM(LEADING '-' FROM SUBSTRING_INDEX({$column}, '.', 1)))) FROM {{$table}};")->fetchField(); - $args['decimal'] = $this->connection->query("SELECT MAX(LENGTH(SUBSTRING_INDEX({$column}, '.', -1))) FROM {{$table}};")->fetchField(); - $args['size'] = $non_decimals + $args['decimal']; - if ($args['size'] > self::DECIMAL_MAX_SIZE || $args['decimal'] > self::DECIMAL_MAX_DECIMAL) { + $decimal = $this->connection->query("SELECT MAX(LENGTH(SUBSTRING_INDEX({$column}, '.', -1))) FROM {{$table}};")->fetchField(); + $size = $non_decimals + $decimal; + if ($size > self::DECIMAL_MAX_SIZE || $decimal > self::DECIMAL_MAX_DECIMAL) { throw new IncompatibleTypeException("Decimal values found in column too large for DECIMAL type; please use type 'string' for column '{$column}'"); } + return [$size, $decimal]; } - return $args; + return []; } /** @@ -218,22 +314,80 @@ protected function buildPreAlterCommands(array $query_fields, string $table): ar // Build pre-alter commands for each query field. foreach ($query_fields as ['name' => $col, 'type' => $type, 'format' => $format]) { - // If this field is a date field, and a valid format is provided; update - // the format of the date fields to ISO-8601 before importing into MySQL. - if ($type === 'date' && !empty($format) && $format !== 'default') { - $mysql_date_format = $this->dateFormatConverter->convert($format); - // Replace empty date strings with NULL to prevent STR_TO_DATE errors. + // Determine base MySQL type for Frictionless column type. + $base_type = $this->getBaseType($type); + + // Replace empty strings with NULL for non-text columns to prevent + // misc. errors (i.e. STR_TO_DATE function related and "Incorrect + // `type` value" errors). + if (!in_array($base_type, self::STRING_FIELD_TYPES, TRUE)) { $pre_alter_cmds[] = $this->connection->update($table)->condition($col, '')->expression($col, 'NULL'); - // Convert date formats for date column. - $pre_alter_cmds[] = $this->connection->update($table)->expression($col, "STR_TO_DATE({$col}, :date_format)", [ - ':date_format' => $mysql_date_format, - ]); } + + // Build pre-alter commands for date fields. + if (in_array($base_type, self::DATE_TIME_TYPES, TRUE)) { + $pre_alter_cmds = array_merge($pre_alter_cmds, $this->buildDatePreAlterCommands($table, $col, $format)); + } + + // Build pre-alter commands for boolean fields. + if ($base_type === 'BOOL') { + $pre_alter_cmds = array_merge($pre_alter_cmds, $this->buildBoolPreAlterCommands($table, $col)); + } + } + + return $pre_alter_cmds; + } + + /** + * Build pre-alter commands for date fields. + * + * Update format of the date fields to ISO-8601 before importing into MySQL. + * + * @param string $table + * Table name. + * @param string $column + * Table column. + * @param string $format + * Field frictionless date format. + * + * @return \Drupal\Core\Database\Query\Update[] + * Pre-alter update DB queries. + */ + protected function buildDatePreAlterCommands(string $table, string $column, string $format): array { + $pre_alter_cmds = []; + + // If a valid format is provided... + if (!empty($format) && $format !== 'default') { + $mysql_date_format = $this->dateFormatConverter->convert($format); + // Convert date formats for date column. + $pre_alter_cmds[] = $this->connection->update($table)->expression($column, "STR_TO_DATE({$column}, :date_format)", [ + ':date_format' => $mysql_date_format, + ]); } return $pre_alter_cmds; } + /** + * Build pre-alter commands for boolean fields. + * + * Convert strings 'true' and 'false' to '1' and '0' for boolean fields. + * + * @param string $table + * Table name. + * @param string $column + * Table column. + * + * @return \Drupal\Core\Database\Query\Update[] + * Pre-alter update DB queries. + */ + protected function buildBoolPreAlterCommands(string $table, string $column): array { + return [ + $this->connection->update($table)->where("UPPER({$column}) = :value", [':value' => 'FALSE'])->expression($column, '0'), + $this->connection->update($table)->where("UPPER({$column}) = :value", [':value' => 'TRUE'])->expression($column, '1'), + ]; + } + /** * Build alter command to modify table column data types. * @@ -379,7 +533,7 @@ protected function getIndexType(string $frictionless_type): string { * Formatted index field option string. */ protected function buildIndexFieldOption(string $name, ?int $length, string $table, string $type): string { - // Extract base type from full MySQL type. + // Extract base type from full MySQL type ("DECIMAL(12, 3)" -> "DECIMAL"). $base_type = strtok($type, '('); // If this field is a string type, determine what it's length should be... if (in_array($base_type, self::STRING_FIELD_TYPES)) { diff --git a/modules/datastore/src/Plugin/QueueWorker/ImportJob.php b/modules/datastore/src/Plugin/QueueWorker/ImportJob.php index 2c38058d2b..2625b2a23f 100644 --- a/modules/datastore/src/Plugin/QueueWorker/ImportJob.php +++ b/modules/datastore/src/Plugin/QueueWorker/ImportJob.php @@ -13,6 +13,65 @@ */ class ImportJob extends AbstractPersistentJob { + /** + * The maximum length of a MySQL table column name. + * + * @var int + */ + protected const MAX_COLUMN_LENGTH = 64; + + /** + * List of reserved words in MySQL 5.6-8 and MariaDB. + * + * @var string[] + */ + protected const RESERVED_WORDS = [ + 'accessible', 'add', 'all', 'alter', 'analyze', 'and', 'as', 'asc', + 'asensitive', 'before', 'between', 'bigint', 'binary', 'blob', 'both', 'by', + 'call', 'cascade', 'case', 'change', 'char', 'character', 'check', + 'collate', 'column', 'condition', 'constraint', 'continue', 'convert', + 'create', 'cross', 'cube', 'cume_dist', 'current_date', 'current_role', + 'current_time', 'current_timestamp', 'current_user', 'cursor', 'database', + 'databases', 'day_hour', 'day_microsecond', 'day_minute', 'day_second', + 'dec', 'decimal', 'declare', 'default', 'delayed', 'delete', 'dense_rank', + 'desc', 'describe', 'deterministic', 'distinct', 'distinctrow', 'div', + 'do_domain_ids', 'double', 'drop', 'dual', 'each', 'else', 'elseif', + 'empty', 'enclosed', 'escaped', 'except', 'exists', 'exit', 'explain', + 'false', 'fetch', 'first_value', 'float', 'float4', 'float8', 'for', + 'force', 'foreign', 'from', 'fulltext', 'function', 'general', 'generated', + 'get', 'grant', 'group', 'grouping', 'groups', 'having', 'high_priority', + 'hour_microsecond', 'hour_minute', 'hour_second', 'if', 'ignore', + 'ignore_domain_ids', 'ignore_server_ids', 'in', 'index', 'infile', 'inner', + 'inout', 'insensitive', 'insert', 'int', 'int1', 'int2', 'int3', 'int4', + 'int8', 'integer', 'intersect', 'interval', 'into', 'io_after_gtids', + 'io_before_gtids', 'is', 'iterate', 'join', 'json_table', 'key', 'keys', + 'kill', 'lag', 'last_value', 'lateral', 'lead', 'leading', 'leave', 'left', + 'like', 'limit', 'linear', 'lines', 'load', 'localtime', 'localtimestamp', + 'lock', 'long', 'longblob', 'longtext', 'loop', 'low_priority', + 'master_bind', 'master_heartbeat_period', 'master_ssl_verify_server_cert', + 'match', 'maxvalue', 'mediumblob', 'mediumint', 'mediumtext', 'middleint', + 'minute_microsecond', 'minute_second', 'mod', 'modifies', 'natural', 'not', + 'no_write_to_binlog', 'nth_value', 'ntile', 'null', 'numeric', 'of', + 'offset', 'on', 'optimize', 'optimizer_costs', 'option', 'optionally', 'or', + 'order', 'out', 'outer', 'outfile', 'over', 'page_checksum', + 'parse_vcol_expr', 'partition', 'percent_rank', 'position', 'precision', + 'primary', 'procedure', 'purge', 'range', 'rank', 'read', 'reads', + 'read_write', 'real', 'recursive', 'references', 'ref_system_id', 'regexp', + 'release', 'rename', 'repeat', 'replace', 'require', 'resignal', 'restrict', + 'return', 'returning', 'revoke', 'right', 'rlike', 'row', 'row_number', + 'rows', 'schema', 'schemas', 'second_microsecond', 'select', 'sensitive', + 'separator', 'set', 'show', 'signal', 'slow', 'smallint', 'spatial', + 'specific', 'sql', 'sql_big_result', 'sql_calc_found_rows', 'sqlexception', + 'sql_small_result', 'sqlstate', 'sqlwarning', 'ssl', 'starting', + 'stats_auto_recalc', 'stats_persistent', 'stats_sample_pages', 'stored', + 'straight_join', 'system', 'table', 'terminated', 'then', 'tinyblob', + 'tinyint', 'tinytext', 'to', 'trailing', 'trigger', 'true', 'undo', 'union', + 'unique', 'unlock', 'unsigned', 'update', 'usage', 'use', 'using', + 'utc_date', 'utc_time', 'utc_timestamp', 'values', 'varbinary', 'varchar', + 'varcharacter', 'varying', 'virtual', 'when', 'where', 'while', 'window', + 'with', 'write', 'xor', 'year_month', 'zerofill', + ]; + /** * Storage class. * @@ -60,6 +119,72 @@ protected function __construct(string $identifier, $storage, array $config = NUL $this->resource = $config['resource']; } + /** + * Transform possible multiline string to single line for description. + * + * @param string $column + * Column name. + * + * @return string + * Column name on single line. + */ + public static function sanitizeDescription(string $column) { + $trimmed = array_filter(array_map('trim', explode("\n", $column))); + return implode(" ", $trimmed); + } + + /** + * Sanitize table column name according to the MySQL supported characters. + * + * @param string $column + * The column name being sanitized. + * + * @returns string + * Sanitized column name. + */ + public static function sanitizeHeader(string $column): string { + // Replace all spaces and newline characters with underscores since they are + // not supported. + $column = preg_replace('/(?: |\r\n|\r|\n)/', '_', $column); + // Strip unsupported characters from the header. + $column = preg_replace('/[^A-Za-z0-9_]/', '', $column); + // Trim underscores from the beginning and end of the column name. + $column = trim($column, '_'); + // Convert the column name to lowercase. + $column = strtolower($column); + + if (is_numeric($column) || in_array($column, ImportJob::RESERVED_WORDS)) { + // Prepend "_" to column name that are not allowed in MySQL + // This can be dropped after move to Drupal 9. + // @see https://github.com/GetDKAN/dkan/issues/3606 + $column = '_' . $column; + } + + return $column; + } + + /** + * Truncate column name if longer than the max column length for the database. + * + * @param string $column + * The column name being truncated. + * + * @returns string + * Truncated column name. + */ + public static function truncateHeader(string $column): string { + // If the supplied table column name is longer than the max column length, + // truncate the column name to 5 characters under the max length and + // substitute the truncated characters with a unique hash. + if (strlen($column) > ImportJob::MAX_COLUMN_LENGTH) { + $field = substr($column, 0, ImportJob::MAX_COLUMN_LENGTH - 5); + $hash = substr(md5($column), 0, 4); + $column = $field . '_' . $hash; + } + + return $column; + } + /** * Get the storage object. */ diff --git a/modules/datastore/tests/data/data-dict.csv b/modules/datastore/tests/data/data-dict.csv index 440b21730a..aee7816fa4 100644 --- a/modules/datastore/tests/data/data-dict.csv +++ b/modules/datastore/tests/data/data-dict.csv @@ -1,3 +1,4 @@ -a,b,c,d -1,02/23/1978,9.07,efghijk -2,09/12/2009,2.37,abcde +a,b,c,d,e +1,02/23/1978,9.07,efghijk,0 +,,,, +2,09/12/2009,2.37,abcde,1 diff --git a/modules/datastore/tests/src/Functional/DictionaryEnforcerTest.php b/modules/datastore/tests/src/Functional/DictionaryEnforcerTest.php index 271d9cb69e..e5bcac8e79 100644 --- a/modules/datastore/tests/src/Functional/DictionaryEnforcerTest.php +++ b/modules/datastore/tests/src/Functional/DictionaryEnforcerTest.php @@ -19,6 +19,7 @@ * @group datastore */ class DictionaryEnforcerTest extends ExistingSiteBase { + use GetDataTrait; /** @@ -96,14 +97,17 @@ public function setUp(): void { $this->uuid = \Drupal::service('uuid'); $this->validMetadataFactory = ServiceTest::getValidMetadataFactory($this); $this->webServiceApi = ImportController::create(\Drupal::getContainer()); - $this->datasetStorage = \Drupal::service('dkan.metastore.storage')->getInstance('dataset'); + $this->datasetStorage = \Drupal::service('dkan.metastore.storage') + ->getInstance('dataset'); // Copy resource file to uploads directory. $file_system = \Drupal::service('file_system'); $upload_path = $file_system->realpath(self::UPLOAD_LOCATION); $file_system->prepareDirectory($upload_path, FileSystemInterface::CREATE_DIRECTORY); $file_system->copy(self::TEST_DATA_PATH . self::RESOURCE_FILE, $upload_path); // Create resource URL. - $this->resourceUrl = \Drupal::service('stream_wrapper_manager')->getViaUri(self::UPLOAD_LOCATION . self::RESOURCE_FILE)->getExternalUrl(); + $this->resourceUrl = \Drupal::service('stream_wrapper_manager') + ->getViaUri(self::UPLOAD_LOCATION . self::RESOURCE_FILE) + ->getExternalUrl(); } /** @@ -134,6 +138,11 @@ public function testDictionaryEnforcement(): void { 'title' => 'D', 'type' => 'string', ], + [ + 'name' => 'e', + 'title' => 'E', + 'type' => 'boolean', + ], ]; $indexes = [ [ @@ -158,7 +167,8 @@ public function testDictionaryEnforcement(): void { $this->metastore->publish('data-dictionary', $dict_id); // Set global data-dictinary in metastore config. - $metastore_config = \Drupal::configFactory()->getEditable('metastore.settings'); + $metastore_config = \Drupal::configFactory() + ->getEditable('metastore.settings'); $metastore_config->set('data_dictionary_mode', DataDictionaryDiscovery::MODE_SITEWIDE); $metastore_config->set('data_dictionary_sitewide', $dict_id); $metastore_config->save(); @@ -166,7 +176,7 @@ public function testDictionaryEnforcement(): void { // Build dataset. $dataset_id = $this->uuid->generate(); $dataset = $this->validMetadataFactory->get($this->getDataset($dataset_id, 'Test ' . $dataset_id, [$this->resourceUrl], TRUE), 'dataset'); - // Create datset. + // Create dataset. $this->metastore->post('dataset', $dataset); $this->metastore->publish('dataset', $dataset_id); @@ -182,24 +192,24 @@ public function testDictionaryEnforcement(): void { $request = Request::create('http://blah/api'); // Retrieve schema for dataset resource. $response = $this->webServiceApi->summary($dist_id, $request); - $result = json_decode($response->getContent(), true); + $result = json_decode($response->getContent(), TRUE); // Validate schema. $this->assertEquals([ - 'numOfColumns' => 5, + 'numOfColumns' => 6, 'columns' => [ 'record_number' => [ 'type' => 'serial', 'length' => 10, - 'unsigned' => true, - 'not null' => true, + 'unsigned' => TRUE, + 'not null' => TRUE, 'mysql_type' => 'int', ], 'a' => [ - 'type' => 'int', - 'length' => 11, - 'mysql_type' => 'int', - ], + 'type' => 'int', + 'length' => 11, + 'mysql_type' => 'int', + ], 'b' => [ 'type' => 'varchar', 'mysql_type' => 'date', @@ -216,6 +226,13 @@ public function testDictionaryEnforcement(): void { 'mysql_type' => 'text', 'description' => 'D', ], + 'e' => [ + 'type' => 'int', + 'mysql_type' => 'tinyint', + 'description' => 'E', + 'length' => 1, + 'size' => 'tiny', + ], ], 'indexes' => [ 'index_a' => [ @@ -228,7 +245,7 @@ public function testDictionaryEnforcement(): void { 'd', ], ], - 'numOfRows' => 2, + 'numOfRows' => 3, ], $result); } diff --git a/modules/datastore/tests/src/Unit/DataDictionary/AlterTableQuery/MySQLQueryTest.php b/modules/datastore/tests/src/Unit/DataDictionary/AlterTableQuery/MySQLQueryTest.php index 8fa05357a8..cea5f3554a 100644 --- a/modules/datastore/tests/src/Unit/DataDictionary/AlterTableQuery/MySQLQueryTest.php +++ b/modules/datastore/tests/src/Unit/DataDictionary/AlterTableQuery/MySQLQueryTest.php @@ -18,6 +18,8 @@ /** * Unit tests for Drupal\datastore\DataDictionary\AlterTableQuery\MySQLQuery. + * + * @coversDefaultClass Drupal\datastore\DataDictionary\AlterTableQuery\MySQLQuery */ class MySQLQueryTest extends TestCase { @@ -134,4 +136,81 @@ public function testExecuteWithTooLargeDecimal(): void { $mysql_query->execute(); } + public function baseTypeProvider() { + return [ + 'string' => ['string', 'TEXT'], + 'getBaseType-does-no-error-checking' => ['not-a-frictionless-type', NULL], + ]; + } + + /** + * @dataProvider baseTypeProvider + * @covers ::getBaseType + */ + public function testGetBaseType($frictionless_type, $expected_type) { + // Mock an object so we don't have to set up dependencies. + $mysql_query = $this->getMockBuilder(MySQLQuery::class) + ->disableOriginalConstructor() + // Don't mock the method we're testing. + ->setMethodsExcept(['getBaseType']) + ->getMock(); + + // getBaseType() is protected. + $get_test_base = new \ReflectionMethod($mysql_query, 'getBaseType'); + $get_test_base->setAccessible(TRUE); + + // Handle the case of exceptions. + if ($expected_type === NULL) { + $this->expectException(\InvalidArgumentException::class); + } + + $sql_type = $get_test_base->invokeArgs($mysql_query, [$frictionless_type]); + if ($expected_type !== NULL) { + $this->assertEquals($expected_type, $sql_type); + } + } + + /** + * Make sure that buildBoolPreAlterCommands calls buildBoolPreAlterCommands. + * + * @covers ::buildBoolPreAlterCommands + * + * @todo This is brittle, feel free to replace. + */ + public function testBuildPreAlterCommandsForBool() { + // Our test data. + $query_fields = [[ + 'name' => 'col', + 'type' => 'boolean', + 'format' => 'default', + 'title' => 'Boolz', + ]]; + + // Mock an object so we can call buildPreAlterCommands() against it. + $mysql_query = $this->getMockBuilder(MySQLQuery::class) + ->disableOriginalConstructor() + // Mock buildBoolPreAlterCommands() so we can set expectations on it. + ->onlyMethods(['buildBoolPreAlterCommands']) + // Don't mock these methods because they're called from + // buildPreAlterCommands() + ->setMethodsExcept(['buildPreAlterCommands', 'getBaseType']) + ->getMock(); + $mysql_query->expects($this->once()) + ->method('buildBoolPreAlterCommands') + ->with( + $this->equalTo('table'), + $this->equalTo('col') + ); + + // Mock a connection so buildPreAlterCommands() can call it. + $query_connection = new \ReflectionProperty($mysql_query, 'connection'); + $query_connection->setAccessible(TRUE); + $query_connection->setValue($mysql_query, $this->buildConnectionChain()->getMock()); + + // Set up the method so we can run it. + $build_pre_alter_command = new \ReflectionMethod($mysql_query, 'buildPreAlterCommands'); + $build_pre_alter_command->setAccessible(TRUE); + $build_pre_alter_command->invokeArgs($mysql_query, [$query_fields, 'table']); + } + } diff --git a/modules/datastore/tests/src/Unit/Plugin/QueueWorker/ImportJobTest.php b/modules/datastore/tests/src/Unit/Plugin/QueueWorker/ImportJobTest.php index e86e37131c..63929fb1c3 100644 --- a/modules/datastore/tests/src/Unit/Plugin/QueueWorker/ImportJobTest.php +++ b/modules/datastore/tests/src/Unit/Plugin/QueueWorker/ImportJobTest.php @@ -238,4 +238,55 @@ public function testNonStorage() { ]); } + public function sanitizeDescriptionProvider() { + return [ + 'multiline' => ["Multi\nLine", 'Multi Line'], + ]; + } + + /** + * @dataProvider sanitizeDescriptionProvider + * @covers \Drupal\datastore\Plugin\QueueWorker\ImportJob::sanitizeDescription + */ + public function testSanitizeDescription($column, $expected) { + $this->assertEquals($expected, ImportJob::sanitizeDescription($column)); + } + + public function sanitizeHeaderProvider() { + return [ + 'reserved_word' => ['accessible', '_accessible'], + 'numeric' => [1, '_1'], + ]; + } + + /** + * @dataProvider sanitizeHeaderProvider + * @covers \Drupal\datastore\Plugin\QueueWorker\ImportJob::sanitizeHeader + */ + public function testSanitizeHeader($column, $expected) { + $this->assertEquals($expected, ImportJob::sanitizeHeader($column)); + } + + public function truncateHeaderProvider() { + $max_length = 64; + return [ + 'max_length' => [ + str_repeat('a', $max_length), + $max_length, + ], + 'longer_length' => [ + str_repeat('b', $max_length + 1), + $max_length, + ], + ]; + } + + /** + * @dataProvider truncateHeaderProvider + * @covers \Drupal\datastore\Plugin\QueueWorker\ImportJob::truncateHeader + */ + public function testTruncateHeader($column, $expected) { + $this->assertEquals($expected, strlen(ImportJob::truncateHeader($column))); + } + } diff --git a/phpunit.xml b/phpunit.xml index ab5bce7831..3ec3332fd3 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,47 +1,29 @@ - - modules/common/src - modules/metastore/src - modules/metastore/modules/metastore/src - modules/metastore/modules/metastore_search/src - modules/metastore/modules/metastore_admin/src - modules/datastore/src - modules/datastore/modules/datastore_mysql_import/src - modules/frontend/src - modules/harvest/src - modules/harvest/modules/harvest_dashboard/src - modules/json_form_widget/src + . - modules/common/src/Tests + modules/common/tests + modules/datastore/tests + modules/dkan_js_frontend/tests + modules/metastore/tests + modules/sample_content ./ ./ - modules/common/tests/src - modules/metastore/tests/src - modules/metastore/modules/metastore/tests/src - modules/metastore/modules/metastore_search/tests/src - modules/metastore/modules/metastore_admin/tests/src - modules/datastore/tests/src - modules/datastore/modules/datastore_mysql_import/tests/src - modules/frontend/tests/src - modules/dkan_js_frontend/tests/src - modules/harvest/tests/src - modules/harvest/modules/harvest_dashboard/tests/src - modules/json_form_widget/tests/src - tests/src + . tests/src/Unit