Permalink
Switch branches/tags
Unicode-11.0 add-more-expr-ops adjust_nursery_stringops alias_analysis annotate_refs_reprapi asyncsocket_listen_sockname backtrace_uninline better_takedispatcher_opt callsite_flags_sso circular_outer_fixups collation-arrays compunit_string_mem_share concat_moar_binary configure-telemeh-probe-rdtscp coroutine crashy_getxfrom_templates ctx-lazy ctypes3 debug-dynvar-bug debugspam_inlining declarative_op_checks decont_assign_nativerefs decrease_spesh_log_memory_growth deserialization_debugspam deterministic-ucd2c dont_gc_in_spesh du-chains-and-opts-WIP du-chains-and-opts dynamic_gen2_tuning early_death_percentage eliminate_redundant_guards esc execname expr-jit-invoke ext-stage extra-usage-chains-fixes finite_callgraph_depth fix-illumos-build fix-null-concat fix_for_expmod fork-safety frame-gc-opts fsa_cleanup_stats fsa_tune_page_sizes gc_worklist_add_vector gen2-frames getenvhash_constant_fold gh-pages heapsnapshot_onlymajor_filter helgrind_support hllbool improve_boxing_and_not in-situ-strings informative_deopt_profile inline_ignore_instrumentation_bytesize inline_in_place inlining-exception-fix issue165 jit-comment-on-spesh-log jit-expr-optimizer jit-moar-ops jit-perf-map jit-sp_speshresolve jit-stack-walker jit_and_opt_setcodeobj jit_devirtualize_reprops_3 jit_getcodeobj jit_indexicim_ops jit_stuff_in_speshlog jitcode-refcount lazier_inline_fixups lazy_static_lex_vivify leave libuv-1.6.1-update line_based_coverage_5 make_builds_reproducible_again make_unbox_removal_available many_null_checks master maybe_fix_big_endian_oldmoar maybe_fix_big_endian moar-gdb-prettyprinter moritz/debian multi_cache_no_segfault_on_null multicachefind multidimarray_view mvmarray_in_situ_storage mvmhash_use_fsa named_to_positional nativecall_script nativeref_decont_split nfa_to_statelist nine-try-this-fix no_atomic_if_single_threaded no_fuse_bb_after_guard nqp-mbc null-normalization optimize_callsite_memory optimize_can_op overflow_exception_mvmarray p6opaque_packed p6opaque_use_fsa pahole pahole2017 pea pointers postrelease-opts prevent_double_unlock_multi_cache_add profile_dump_less_stack_usage profiler-extra-type-info profiler_new_spesh_semantics profiling-additions refuse_dangerous_inlines restricted return_from_inline_without_log_exit sepsh set-removal sha1bin short_string_cache slower/elim-take-dispatcher smoke-me/spaceybuild speculative-calls spesh-array-access spesh-leaks spesh-value-prop spesh_comments spesh_constant_folding spesh_faster_shutdown spesh_hll_and_boot_types spesh_lex_vivify_checks spesh_remove_set_op spesh_tune_alloc speshplugin_guardstaticcode sync-without-uv telemeh_try telemeh_windows_port template-compiler-refactor udp_receive_hostname_port update_libatomic_ops uthash_padding valgrind_support vectorization vmhealth wip-mvmarray-refactor wip-tile-no-template
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 138 lines (136 sloc) 4.84 KB
#!/usr/bin/env perl6
# Gets the latest Unicode Data files and extracts them.
use v6;
my $UCD-zip-lnk = "ftp://ftp.unicode.org/Public/UCD/latest/ucd/UCD.zip";
my $UCA-all-keys = "ftp://ftp.unicode.org/Public/UCA/latest/allkeys.txt";
my $UCA-collation-test = "ftp://ftp.unicode.org/Public/UCA/latest/CollationTest.zip";
my $CODETABLES_URL = 'ftp://ftp.unicode.org/Public/MAPPINGS/';
my @CODETABLES =
'VENDORS/MICSFT/WINDOWS/CP1252.TXT',
'VENDORS/MICSFT/WINDOWS/CP1251.TXT';
my IO::Path $unidata = "UNIDATA".IO.absolute.IO;
sub MAIN {
if ! so $unidata.d {
say "Creating UNIDATA directory";
$unidata.mkdir;
}
else {
die "$unidata directory already exists. Please delete it and run again.";
}
chdir $unidata;
chdir $unidata;
if ! so "./UCD.zip".IO.f {
say "Downloading the latest UCD from $UCD-zip-lnk";
download-file($UCD-zip-lnk,"UCD.zip");
say "Unzipping UCD.zip";
unzip-file("UCD.zip");
}
if ! so "UCA".IO.d {
say "Creating the UCA directory";
mkdir "UCA";
download-set-file($UCA-collation-test, 'CollationTest.zip', "UCA");
}
if ! so "./UCA/allkeys.txt".IO.f {
say "Downloading allkeys.txt from $UCA-all-keys";
chdir "UCA".IO;
download-file($UCA-all-keys, "allkeys.txt");
chdir $unidata;
}
if ! so $unidata.d {
say "Creating UNIDATA directory";
$unidata.mkdir;
}
chdir $unidata;
if ! so "UCD.zip".IO.f {
say "Downloading the latest UCD from $UCD-zip-lnk";
download-file($UCD-zip-lnk,"UCD.zip");
say "Unzipping UCD.zip";
unzip-file("UCD.zip");
}
if ! so "UCA".IO.d {
say "Creating the UCA directory";
mkdir "UCA";
download-set-file($UCA-collation-test, 'CollationTest.zip', "UCA");
}
if ! so "./UCA/allkeys.txt".IO.f {
say "Downloading allkeys.txt from $UCA-all-keys";
chdir "UCA".IO;
download-file($UCA-all-keys, "allkeys.txt");
chdir '..';
}
if ! "CODETABLES".IO.d {
say "Downloading codetables from $CODETABLES_URL";
mkdir "CODETABLES";
chdir "CODETABLES";
for @CODETABLES {
say "dling $CODETABLES_URL$_";
download-file("$CODETABLES_URL$_", urlfilename($_));
}
download-file("https://encoding.spec.whatwg.org/index-jis0208.txt", urlfilename("index-jis0208.txt"));
}
get-emoji();
}
sub download-file ( Str:D $url, Str:D $filename ) {
qqx{curl "$url" -o "$filename"};
}
sub download-set-file ( Str:D $url, Str:D $filename, Str:D $dir) {
if ! so "$dir/$filename".IO.f {
my $cwd = $*CWD;
say "Downloading $filename from $url";
chdir $dir.IO;
download-file($url, $filename);
chdir $cwd;
}
if $filename.ends-with('.zip') {
my $cwd = $*CWD;
chdir $dir.IO;
unzip-file($filename);
chdir $cwd;
}
}
sub urlfilename (Str:D $str) {
$str.subst: /^.*\//, ""
}
sub unzip-file ( Str:D $zip ) {
qqx{unzip "$zip"};
}
sub get-emoji {
chdir $unidata;
# Since emoji sequence names are not cannonical and unchangeable, we get
# all of them starting with the first the feature was added in
my $first-emoji-ver = <4.0>;
my $emoji-dir = "ftp://ftp.unicode.org/Public/emoji/";
my @emoji-vers;
say "Getting a listing of the Emoji versions";
for qqx{curl -s "$emoji-dir"}.lines {
push @emoji-vers, .split(/' '+/)[8];
}
say "Emoji versions: ", @emoji-vers.join(', ');
#exit;
for @emoji-vers.grep($first-emoji-ver <= *).sort.reverse -> $version {
say "See version $version of Emoji, checking to see if it's a draft";
my $readme = qqx{curl -s "ftp://ftp.unicode.org/Public/emoji/$version/ReadMe.txt"}.chomp;
if $readme.match(/draft|PRELIMINARY/, :i) {
say "Looks like $version is a draft. ReadMe.txt text: <<$readme>>";
next;
}
else {
say "Found version $version. Don't see /:i draft|PRELIMINARY/ in the text.";
my $emoji-data = "ftp://ftp.unicode.org/Public/emoji/$version/";
say $emoji-data;
my $emoji-folder = "emoji-$version".IO;
$emoji-folder.mkdir;
chdir $emoji-folder;
my @to-download = <ReadMe.txt emoji-data.txt emoji-sequences.txt emoji-zwj-sequences.txt emoji-test.txt>;
for @to-download -> $filename {
download-file "$emoji-data/$filename", $filename;
}
#download-file("$emoji-data/ReadMe.txt", "ReadMe.txt");
#download-file("$emoji-data/emoji-data.txt", "emoji-data.txt");
#download-file("$emoji-data/emoji-sequences.txt", "emoji-sequences.txt");
#download-file("$emoji-data/emoji-zwj-sequences.txt", "emoji-zwj-sequences.txt");
chdir "..";
#last;
}
}
}