Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Added token scraper, first version, but works so far good!

  • Loading branch information...
commit 9ed9047cb2d38bae72b850ba1c26a2ac6dedb23d 1 parent c93d75b
@Getty authored
Showing with 270 additions and 1 deletion.
  1. +269 −0 bin/locale_simple_scraper
  2. +1 −1  t/data/locale_simple_test.html
View
269 bin/locale_simple_scraper
@@ -0,0 +1,269 @@
+#!/usr/bin/env perl
+# PODNAME: locale_simple_scraper
+# ABSTRACT: Command line tool for finding tokens in code
+
+=head1 SYNPOSIS
+
+ # ignoring specific regexps
+ # please be aware that you must escape the . and because we use bash here you
+ # must also be aware to escape the \ for escaping the .
+ locale_simple_scraper --ignores \\.build --ignores some/dir
+
+ # different output parameter, might require more modules
+ locale_simple_scraper --output perl
+ locale_simple_scraper --output yaml
+ locale_simple_scraper --output json
+
+ # setting additional extensions for Javascript (default: js)
+ locale_simple_scraper --js jjs,ajs
+
+ # setting additional extensions for Perl (default: pl, pm, t)
+ locale_simple_scraper --pl ppl,pppl
+
+ # setting additional extensions for Text::Xslate (default: tx)
+ locale_simple_scraper --tx ttx,xxx
+
+=head1 DESCRIPTION
+
+This tool parses all Perl, Python, Javascript and Text::Xslate templates in the
+current directory and subdirectories to find calls to the L<Locale::Simple>
+API.
+
+It gives out on STDERR which files are parsed right now, while it dumps the
+resulting data to the screen. By default it generates a B<.po> file, but you can
+specify via I<--output> to dump B<json>, B<yaml> or a B<perl> data structure.
+
+Be aware that we add the domain as flag in the B<.po> file. So double token
+may appear which only differs through a B<#,> flag.
+
+=head1 SUPPORT
+
+Repository
+
+ http://github.com/Getty/p5-locale-simple
+ Pull request and additional contributors are welcome
+
+Issue Tracker
+
+ http://github.com/Getty/p5-locale-simple/issues
+
+=cut
+
+$|=1;
+
+use strict;
+use warnings;
+use Getopt::Long;
+use File::Find;
+use Cwd;
+use IO::All;
+use Locale::Simple;
+use Data::Dumper;
+
+#
+# !!! Work in progress !!!
+#
+
+ # Supported filetypes:
+my $js_ext = ""; # Javascript
+my $pl_ext = ""; # Perl
+my $py_ext = ""; # Python
+my $tx_ext = ""; # Text::Xslate (Kolon or Metakolon)
+
+my @ignores;
+
+my $output = 'po';
+
+GetOptions(
+ "js=s" => \$js_ext,
+ "pl=s" => \$pl_ext,
+ "py=s" => \$py_ext,
+ "tx=s" => \$tx_ext,
+ "ignore=s" => \@ignores,
+ "output=s" => \$output,
+);
+
+# could add Getopt::Long here for override
+
+my @js = split(",",$js_ext);
+push @js, 'js';
+
+my @pl = split(",",$pl_ext);
+push @pl, 'pl', 'pm', 't';
+
+my @tx = split(",",$tx_ext);
+push @tx, 'tx';
+
+my @py = split(",",$py_ext);
+push @py, 'py';
+
+# extension list
+my %e = (
+ ( map { $_ => 'js' } @js ),
+ ( map { $_ => 'pl' } @pl ),
+ ( map { $_ => 'tx' } @tx ),
+ ( map { $_ => 'py' } @py ),
+);
+
+# functions with count of locale simple with function of parameter
+#
+# 1 = msgid
+# 2 = msgid_plural
+# 3 = msgctxt
+# 4 = domain
+#
+my %f = (
+ l => [1],
+ ln => [1,2],
+ ld => [4,1],
+ lp => [3,1],
+ lnp => [3,1,2],
+ ldn => [4,1,2],
+ ldp => [4,3,1],
+ ldnp => [4,3,1,2],
+);
+
+my @found;
+
+my $dir = getcwd;
+my $re_dir = $dir;
+$re_dir =~ s/\./\\./g;
+
+finddepth(sub {
+ my $filename = $File::Find::name;
+ $filename =~ s/^$dir\///g;
+ for (@ignores) {
+ return if $filename =~ /$_/;
+ }
+ my @fileparts = split('\.',$File::Find::name);
+ my $ext = pop @fileparts;
+ if (grep { $ext eq $_ } keys %e) {
+ my $file = $File::Find::name;
+ my $type = $e{$ext};
+ print STDERR $type." => ".$file."\n";
+ my @lines = io($file)->slurp;
+ my $line = 0;
+ for (@lines) {
+ $line++;
+ my @results = parse_line($_, $type);
+ for (@results) {
+ push @found, {
+ %{$_},
+ line => $line,
+ file => $filename,
+ type => $type,
+ }
+ }
+ }
+ }
+}, $dir);
+
+if ($output eq 'po') {
+ my %files;
+ my %token;
+ for (@found) {
+ my $key .= defined $_->{domain} ? '"'.$_->{domain}.'"' : 'undef';
+ $key .= defined $_->{msgctxt} ? '"'.$_->{msgctxt}.'"' : 'undef';
+ $key .= defined $_->{msgid} ? '"'.$_->{msgid}.'"' : 'undef';
+ $key .= defined $_->{msgid_plural} ? '"'.$_->{msgid_plural}.'"' : 'undef';
+ $token{$key} = $_ unless defined $token{$key};
+ $files{$key} = [] unless defined $files{$key};
+ push @{$files{$key}}, $_->{file}.':'.$_->{line};
+ }
+ for my $k (sort { $a cmp $b } keys %files) {
+ print "\n";
+ print "#: ".join(' ',@{$files{$k}})."\n";
+ print "#, locale-simple-format";
+ print " ".$token{$k}{domain} if defined $token{$k}{domain};
+ print "\n";
+ for (qw( msgctxt msgid msgid_plural )) {
+ print $_.' "'.Locale::Simple::gettext_escape($token{$k}{$_}).'"'."\n" if defined $token{$k}{$_};
+ }
+ }
+} elsif ($output eq 'perl') {
+ print Dumper \@found;
+} elsif ($output eq 'json') {
+ eval {
+ require JSON;
+ JSON->import;
+ print encode_json(\@found);
+ } or do {
+ die "You require the module JSON for this output";
+ };
+} elsif ($output eq 'yaml') {
+ eval {
+ require YAML;
+ YAML->import;
+ print Dump(\@found);
+ } or do {
+ die "You require the module YAML for this output";
+ };
+}
+
+sub parse_line {
+ my ( $line, $type, @results ) = @_;
+ return if $line =~ /^\s*\#.*/;
+ for (keys %f) {
+ my @args = @{$f{$_}};
+ for ($line =~ /[^\w]${_}\((.*)/) {
+ my $argc = scalar @args;
+ my ( $remainder, @params ) = parse_params($1, $type, $argc);
+ if (scalar @params == $argc) {
+ my %result;
+ my $pos = 0;
+ for (@args) {
+ $result{msgid} = $params[$pos] if $_ eq 1;
+ $result{msgid_plural} = $params[$pos] if $_ eq 2;
+ $result{msgctxt} = $params[$pos] if $_ eq 3;
+ $result{domain} = $params[$pos] if $_ eq 4;
+ $pos++;
+ }
+ push @results, \%result, parse_line( $remainder, $type );
+ }
+ }
+ }
+ return @results;
+}
+
+sub parse_params {
+ my ( $params, $type, $argc ) = @_;
+ my @chars = split('',$params);
+ my @args;
+ my $arg = "";
+ my $q_state = 0; # 0 = code, 1 = qoute, 2 = double qoute
+ my $comma_state = 1;
+ while (defined (my $c = shift @chars)) {
+ next if $c =~ /\s/ and !$q_state;
+ if ($q_state) {
+ if ($c eq '\\') {
+ my $esc = shift @chars;
+ if ($esc eq "'" or $esc eq '"' or $esc eq '\\') {
+ $arg .= $c;
+ } else {
+ warn "Unknown escape char '".$esc."'";
+ }
+ } elsif ( ( $c eq "'" and $q_state == 1 ) or ( $c eq '"' and $q_state == 2 ) ) {
+ $q_state = 0;
+ $comma_state = 0;
+ push @args, $arg;
+ $arg = "";
+ last if scalar @args == $argc;
+ } else {
+ $arg .= $c;
+ }
+ } else {
+ if ($c eq "'" or $c eq '"') {
+ die "quote found where comma expected: ".$params unless $comma_state;
+ $q_state = $c eq "'" ? 1 : 2;
+ } elsif ($c eq ',') {
+ die "comma found after comma in code: ".$params if $comma_state;
+ $comma_state = 1;
+ } elsif ($type eq 'js') {
+ last;
+ } else {
+ last;
+ }
+ }
+ }
+ return join('', @chars), @args;
+}
View
2  t/data/locale_simple_test.html
@@ -29,7 +29,7 @@
var faildiv = document.getElementById('fail');
faildiv.style.display = 'block';
}
- }
+ }
window.onload = function() {
Please sign in to comment.
Something went wrong with that request. Please try again.