/
vip-go-convert-to-utf8mb4.php
373 lines (303 loc) · 11.1 KB
/
vip-go-convert-to-utf8mb4.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
<?php
class VIP_Go_Convert_To_utf8mb4 extends WPCOM_VIP_CLI_Command {
/**
* Command arguments
*/
private $dry_run = true;
/**
* Class properties
*/
private $tables = array();
private $protect_masquerading_utf8 = false;
const TYPE_MAPPING = array(
'char' => 'binary',
'text' => 'blob',
'tinytext' => 'tinyblob',
'mediumtext' => 'mediumblob',
'longtext' => 'longblob',
'varchar' => 'varbinary', // length handled in maybe_protect_column()
'enum' => 'enum',
);
/**
* Convert site using `utf8` or `latin1` to use `utf8mb4`
*
* Adapted from https://codex.wordpress.org/Converting_Database_Character_Sets, particularly the method for protecting `utf8` masquerading as `latin1`
*
* Use `--protect-latin-one` when table contains `utf8` characters in `latin1`-encoded columns
*
* ## OPTIONS
*
* <protect-latin-one>
* : If passed, script assumes that `latin1` columns contain `utf8` content that
* should not be converted directly to `utf8mb4`. Required when migrating
* from WordPress.com.
*
* <dry-run>
* : Whether or not to modify the database, or simply inspect it.
* ---
* default: false
* options:
* - true
* - false
* ---
*
* @subcommand convert
* @synopsis [--dry-run=<dry-run>] [--protect-latin-one]
*/
public function convert( $args, $assoc_args ) {
global $wpdb;
WP_CLI::line( 'CONVERSION TO `utf8mb4` REQUESTED' );
// Parse arguments
$_dry_run = WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run', true );
if ( 'false' === $_dry_run ) {
$this->dry_run = false;
}
$_protect_columns = WP_CLI\Utils\get_flag_value( $assoc_args, 'protect-latin-one', false );
if ( false !== $_protect_columns ) {
$this->protect_masquerading_utf8 = true;
}
WP_CLI::line( '' );
WP_CLI::line( 'ARGUMENTS' );
WP_CLI::line( '* dry run: ' . ( $this->dry_run ? 'yes' : 'no' ) );
WP_CLI::line( '* protecting utf8 masquerading as latin1: ' . ( $this->protect_masquerading_utf8 ? 'yes' : 'no' ) );
WP_CLI::line( '' );
// Validate starting charset to avoid catastrophe
WP_CLI::line( 'PREFLIGHT CHECKS' );
if ( in_array( $wpdb->charset, array( 'latin1', 'utf8', ), true ) ) {
WP_CLI::line( "* Expected charset (`{$wpdb->charset}`) found." );
} elseif ( 'utf8mb4' === $wpdb->charset ) {
WP_CLI::error( 'Site is already using `utf8mb4`. Aborting!' );
return;
} else {
WP_CLI::error( "Unacceptable starting encoding: `{$wpdb->charset}`. Aborting!" );
return;
}
// Describe scope
if ( is_multisite() ) {
WP_CLI::line( '* Multisite detected, so this process will convert all network and global tables, along with the blog tables for all sites.' );
} else {
WP_CLI::line( '* Single site detected, so global and blog-specific tables will be converted. Any multisite tables will be skipped.' );
}
// Describe tables to be converted
$this->get_tables();
$tables_count = number_format( count( $this->tables ) );
$tables_string = implode( ', ', $this->tables );
WP_CLI::line( "* Found {$tables_count} tables to check and potentially convert: {$tables_string}." );
WP_CLI::line( '' );
// Provide an opportunity to abort
WP_CLI::confirm( "Proceed with " . ( $this->dry_run ? 'DRY' : 'LIVE' ) . " RUN and " . ( $this->dry_run ? 'test converting' : 'potentially convert' ) . " {$tables_count} tables from `{$wpdb->charset}` to `utf8mb4`?" );
if ( ! $this->dry_run ) {
WP_CLI::confirm( 'ARE YOU REALLY SURE?' );
}
WP_CLI::line( '' );
WP_CLI::line( 'Proceeding...' );
WP_CLI::line( '' );
unset( $tables_count, $tables_string );
// Do the work we came here for
foreach ( $this->tables as $table ) {
WP_CLI::line( "Converting {$table}..." );
$converted = $this->maybe_convert_table_to_utf8mb4( $table );
if ( true === $converted ) {
WP_CLI::success( "Done with {$table}." );
} elseif ( false === $converted ) {
if ( $this->dry_run ) {
WP_CLI::line( "Table {$table} not converted during dry run." );
} else {
WP_CLI::warning( "Table {$table} not converted because it doesn't exist or doesn't contain convertible columns." );
}
} else {
WP_CLI::warning( 'Unknown response: ' . var_export( $converted, true ) );
}
WP_CLI::line( '' );
}
// Update DB's default charset/collation
$convert_db = $wpdb->query( 'ALTER DATABASE ' . DB_NAME . ' CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci' );
if ( $convert_db ) {
WP_CLI::success( "Set database to utf8mb4" );
} else {
WP_CLI::warning( "Could not update database default charset" );
}
// Wrap up
WP_CLI::line( '' );
WP_CLI::line( '' );
WP_CLI::line( 'DONE!' );
WP_CLI::line( 'Time to update the `db_charset` and `db_collate` sitemeta, and reload web configs.' );
WP_CLI::line( 'DB_CHARSET: `utf8mb4`' );
WP_CLI::line( 'DB_COLLATE: `utf8mb4_unicode_ci`' );
}
/**
* UTILITY METHODS
*/
/**
* Populate array of tables to possibly convert
*
* NOTE: We don't use `$wpdb->tables( 'all' )` because it won't include all tables
* for every site in a multisite network. Instead, it will only include the current
* site's tables, along with the global tables. We want ALL tables, so we query for
* sites and merge each site's tables into a single array.
*/
private function get_tables() {
global $wpdb;
// Start with the global tables
// Under multisite, this includes the global multisite tables
$tables = array_values( $wpdb->tables( 'global' ) );
// Add blog-specific tables
if ( is_multisite() ) {
$site_ids = get_sites( array(
'fields' => 'ids',
) );
if ( is_array( $site_ids ) && ! empty( $site_ids ) ) {
foreach ( $site_ids as $site_id ) {
$tables = array_merge( $tables, array_values( $wpdb->tables( 'blog', true, $site_id ) ) );
}
unset( $site_id );
}
unset( $site_ids );
} else {
$tables = array_merge( $tables, array_values( $wpdb->tables( 'blog' ) ) );
}
// Store and return
$this->tables = array_unique( $tables );
unset( $tables );
return $this->tables;
}
/**
* If a table only contains latin1, utf8, or utf8mb4 columns, convert it to utf8mb4.
*
* Copied from wp-admin/includes/upgrade.php, with modifications for CLI usage
*/
private function maybe_convert_table_to_utf8mb4( $table ) {
global $wpdb;
$results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `$table`" );
if ( ! $results ) {
return false;
}
foreach ( $results as $column ) {
if ( $column->Collation ) {
list( $charset ) = explode( '_', $column->Collation );
$charset = strtolower( $charset );
if ( ! in_array( $charset, array( 'latin1', 'utf8', 'utf8mb4', ), true ) ) {
// Don't upgrade tables that have columns we can't convert.
return false;
}
}
}
$table_details = $wpdb->get_row( "SHOW TABLE STATUS LIKE '$table'" );
if ( ! $table_details ) {
return false;
}
list( $table_charset ) = explode( '_', $table_details->Collation );
$table_charset = strtolower( $table_charset );
if ( 'utf8mb4' === $table_charset ) {
return true;
}
if ( $this->dry_run ) {
return false;
} else {
$column_convert_error = null;
if ( $this->protect_masquerading_utf8 ) {
$column_convert_error = false;
$protected_columns = $this->convert_masquerading_columns( $table );
// Exclude columns that didn't need conversion
$protected_columns = array_filter( $protected_columns );
// Alert to any columns that encountered errors
foreach ( $protected_columns as $col => $statuses ) {
if ( in_array( false, $statuses, true ) ) {
$column_convert_error = true;
WP_CLI::warning( "Problem converting {$col}" );
}
}
}
$convert = $wpdb->query( "ALTER TABLE $table CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" );
if ( is_int( $convert ) || $convert ) {
WP_CLI::line( "Converted table {$table}" );
$convert = true;
}
if ( true === $column_convert_error ) {
$convert = false;
}
return $convert;
}
}
/**
* When requested, convert columns that contain utf8 data but are encoded as latin1
*
* @param string $table
* @return array
*/
private function convert_masquerading_columns( $table ) {
global $wpdb;
$columns = $wpdb->get_results( "SHOW COLUMNS FROM $table;" );
$cols_converted = array();
foreach ( $columns as $col ) {
$cols_converted[ $col->Field ] = $this->maybe_convert_column( $col, $table );
}
return $cols_converted;
}
/**
* Convert latin1 columns that actually contain utf8
*
* @return mixed
*/
private function maybe_convert_column( $col, $table ) {
global $wpdb;
$from_type = $to_type = null;
foreach ( self::TYPE_MAPPING as $from => $to ) {
// Most will be exact matches, except for varchar and enum
if (
$col->Type === $from ||
( 'enum' === $from && 0 === stripos( $col->Type, $from ) ) ||
( 'varchar' === $from && 0 === stripos( $col->Type, $from ) )
) {
$from_type = $from;
$to_type = $to;
break;
}
}
unset( $from, $to );
// Carry on, we don't care about this column
if ( is_null( $from_type ) || is_null( $to_type ) ) {
return false;
}
// enums are special
if ( 0 === stripos( $from_type, 'enum' ) ) {
// If we can't find the options, something is very wrong
if ( ! preg_match( '#' . preg_quote( $from_type ) . '\(([^\)]+)\)#i', $col->Type, $options ) ) {
return false;
}
$options = array_pop( $options );
$null_not_null = 'yes' === strtolower( $col->Null ) ? 'NULL' : 'NOT NULL';
WP_CLI::line( "Converting column {$col->Field}" );
$convert = $wpdb->query( $wpdb->prepare( 'ALTER TABLE %1$s CHANGE %2$s %2$s ENUM(' . $options . ') CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci %3$s DEFAULT "%4$s"', $table, $col->Field, $null_not_null, $col->Default ) );
WP_CLI::line( "Finished converting column {$col->Field}" );
return array(
'convert' => $convert,
'restore' => $convert,
);
}
// Maintain varchar length
if ( 0 === stripos( $from_type, 'varchar' ) ) {
// If we can't find the length, something is very wrong
if ( ! preg_match( '#' . preg_quote( $from_type ) . '\(([\d]+)\)#i', $col->Type, $length ) ) {
return false;
}
$length = array_pop( $length );
if ( ! is_numeric( $length ) ) {
return false;
}
$from_type .= "({$length})";
$to_type .= "({$length})";
}
$null_not_null = 'yes' === strtolower( $col->Null ) ? ' NULL' : ' NOT NULL';
// On with it!
WP_CLI::line( "Converting column {$col->Field}" );
// Double conversion corrects column charset without changing its content, as the types converted to do not use charsets
// See https://codex.wordpress.org/Converting_Database_Character_Sets
$pattern = 'ALTER TABLE %1$s CHANGE %2$s %2$s %3$s %4$s %5$s DEFAULT "%6$s"';
$convert = $wpdb->query( $wpdb->prepare( $pattern, $table, $col->Field, $to_type, '', $null_not_null, $col->Default ) );
$restore = $wpdb->query( $wpdb->prepare( $pattern, $table, $col->Field, $from_type, 'CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci', $null_not_null, $col->Default ) );
WP_CLI::line( "Finished converting column {$col->Field}" );
return compact( 'convert', 'restore' );
}
}
WP_CLI::add_command( 'vip utf8mb4', 'VIP_Go_Convert_To_utf8mb4' );