|
4 | 4 | # * Bug fixes in the spell checker have removed the need
|
5 | 5 | # to check certain words.
|
6 | 6 | # * Edits to the docs themselves no longer use some words.
|
7 |
| - |
| 7 | +# |
| 8 | +# set UTIL_CLEAN_SPELL_REGEX environment variable to |
| 9 | +# only check words that match |
| 10 | +# that regex. This test can be slow, this gives |
| 11 | +# us an easy way to chunk the test runs. |
| 12 | +# |
| 13 | +# Try to be clever and only test files that match the word, |
| 14 | +# even if it's a partial match to speed up the testing. |
| 15 | +# |
8 | 16 | # Trust but verify: make sure you rerun the entire spell check
|
9 | 17 | # after letting this program update the .pws files
|
10 | 18 |
|
11 | 19 | use File::Temp;
|
12 | 20 |
|
13 |
| -use lib 'lib'; |
| 21 | +use lib $*PROGRAM.parent(2).child('lib'); |
14 | 22 | use Test-Files;
|
15 | 23 | use Pod::Cache;
|
16 | 24 |
|
| 25 | +my $regex = %*ENV<UTIL_CLEAN_SPELL_REGEX> // "."; |
| 26 | + |
| 27 | +# Check the same files as xt/aspell.t does by default... |
17 | 28 | my @files = Test-Files.documents.grep({not $_ ~~ / 'README.' .. '.md' /});
|
18 | 29 |
|
| 30 | +# ... but use pre-generated/rendered Pod6 files for our quick search. |
19 | 31 | @files = @files.map({
|
20 | 32 | $_.ends-with('.pod6') ?? Pod::Cache.cache-file($_) !! $_;
|
21 | 33 | });
|
22 | 34 |
|
23 |
| -my %killed; |
24 |
| - |
25 |
| -sub erase-word($file, @words, $word="") { |
26 |
| - my ($tmp_fname, $tmp_io) = tempfile; |
27 |
| - for @words -> $i { |
28 |
| - next if $i eq $word; |
29 |
| - next if %killed{$i}:exists; |
30 |
| - $tmp_io.say($i); |
31 |
| - } |
32 |
| - $tmp_io.close; |
33 |
| - run('mv', $tmp_fname, $file); |
34 |
| -} |
35 |
| - |
36 | 35 | for <xt/pws/words.pws xt/pws/code.pws> -> $dict {
|
37 |
| - my @words = $dict.IO.lines; |
38 |
| - for @words -> $word { |
| 36 | + for $dict.IO.lines -> $word { |
| 37 | + next unless $word ~~ /<$regex>/; |
| 38 | + next if $word eq "personal_ws-1.1 en 0 utf-8"; |
39 | 39 | note "Testing $dict / $word ";
|
40 |
| - my $keep = True; |
41 | 40 |
|
42 |
| - my $proc = Proc::Async.new( 'grep', '-li', $word, |@files); |
43 |
| - my $output = ""; |
44 |
| - $proc.stdout.tap(-> $buf { $output ~= $buf }); |
45 |
| - my $promise = await $proc.start; |
| 41 | + my $proc = run( 'grep', '-li', $word, |@files, :out); |
| 42 | + my $output = $proc.out.slurp; |
46 | 43 |
|
47 |
| - if +$promise.status { |
48 |
| - say "Can't find $dict/$word anywhere, kill it."; |
49 |
| - $keep = False; |
| 44 | + # remove word, keep pointer to backup lexicon |
| 45 | + my $backup = erase-word($dict, $word); |
| 46 | + |
| 47 | + if $output eq '' { |
| 48 | + note "\tnot found, removing."; |
50 | 49 | } else {
|
51 |
| - # Remove the one word we're testing from the file. |
52 |
| - erase-word($dict, @words, $word); |
53 |
| - my @raw-files = $output.lines.map({ $_.subst('.pod-cache/', '') }); |
54 |
| - my $proc = run( 'xt/aspell.t', |@raw-files ); |
| 50 | + my @min-files = $output.lines; |
| 51 | + note "\tfound in {+@min-files} files, testing."; |
| 52 | + my $proc = run( 'xt/aspell.t', |@min-files, :out, :err); |
55 | 53 | if $proc.exitcode == 0 {
|
56 |
| - say "aspell test passed"; |
57 |
| - $keep = False; |
| 54 | + note "\taspell test passed, removing word"; |
58 | 55 | } else {
|
59 |
| - say "aspell test failed, keep it"; |
| 56 | + note "\taspell test failed, keeping word"; |
| 57 | + run('mv', $backup, $dict); |
60 | 58 | }
|
61 | 59 | }
|
62 |
| - %killed{$word} = True unless $keep; |
63 | 60 | }
|
64 |
| - erase-word($dict, @words); |
65 | 61 | }
|
66 | 62 |
|
67 |
| -dd %killed; |
| 63 | +sub erase-word($dict, $word) { |
| 64 | + # Create a temp copy of the lexicon that doesn't contain the word |
| 65 | + my ($tmp_fname, $tmp_io) = tempfile; |
| 66 | + for $dict.IO.lines -> $i { |
| 67 | + $tmp_io.say($i) unless $i eq $word; |
| 68 | + } |
| 69 | + $tmp_io.close; |
| 70 | + |
| 71 | + # backup the dictionary file |
| 72 | + my ($backup_fname, $bkp_io) = tempfile; |
| 73 | + run('cp', $dict, $backup_fname); |
| 74 | + |
| 75 | + # try the updated copy |
| 76 | + run('mv', $tmp_fname, $dict); |
| 77 | + |
| 78 | + # return a link to the last good copy of the file in case caller needs to restore it. |
| 79 | + return $backup_fname; |
| 80 | +} |
0 commit comments