Skip to content

Commit

Permalink
Adding skip_errors option
Browse files Browse the repository at this point in the history
  • Loading branch information
phochste committed Jul 21, 2017
1 parent 3f98196 commit 35a1111
Show file tree
Hide file tree
Showing 6 changed files with 674 additions and 74 deletions.
2 changes: 1 addition & 1 deletion Build.PL
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v6.008.
# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v6.005.
use strict;
use warnings;

Expand Down
4 changes: 3 additions & 1 deletion Changes
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
Revision history for Catmandu-MARC

{{$NEXT}}
- Adding skip_errors option in the importer and exporter
- More POD

1.171 2017-07-13 08:50:35 CEST
- Fixing 0 as false bug in marc_spec

1.17 2017-07-12 11:45:56 CEST
- Fixing 0 as false bug when using from/until
- Fixing double fix execution bug
Expand Down
73 changes: 46 additions & 27 deletions lib/Catmandu/Exporter/MARC.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use Moo;
our $VERSION = '1.171';

has type => (is => 'ro' , default => sub { 'ISO' });
has skip_errors => (is => 'ro');
has _exporter => (is => 'ro');

with 'Catmandu::Exporter';
Expand All @@ -31,7 +32,20 @@ sub BUILD {
}

sub add {
$_[0]->_exporter->add($_[1]);
my ($self) = @_;

if ($self->skip_errors) {
eval {
$self->_exporter->add($_[1]);
};

if ($@) {
$self->log->error("error at record " . $self->count . " : $@");
}
}
else {
$self->_exporter->add($_[1]);
}
}

sub commit {
Expand All @@ -48,17 +62,14 @@ Catmandu::Exporter::MARC - Exporter for MARC records
=head1 SYNOPSIS
# From the command line
$ catmandu convert MARC --type ISO to MARC --type XML < /foo/bar.mrc
# From Perl
use Catmandu;
# Convert MARC to MARC
$ catmandu convert MARC to MARC < /foo/bar.mrc > /foo/output.mrc
my $importer = Catmandu->importer('MARC', file => "/foo/bar.mrc" , type => 'ISO');
my $exporter = Catmandu->exporter('MARC', file => "marc.xml", type => "XML" );
# Add fixes
$ catmandu convert MARC to MARC --fix myfixes.txt < /foo/bar.mrc > /foo/output.mrc
$exporter->add($importer);
$exporter->commit;
# Convert on format to another format
$ catmandu convert MARC --type ISO to MARC --type ALEPHSEQ < /foo/bar.mrc > /foo/bar.aleph
=head1 DESCRIPTION
Expand All @@ -67,38 +78,46 @@ to a file or the standard output.
=head1 CONFIGURATION
In addition to the configuration provided by L<Catmandu::Exporter> (C<file>,
C<fh>, etc.) the exporter can be configured with the following parameters:
=over
The 'type' parameter describes the MARC syntax variant. Supported values include:
=item file
=over
Write output to a local file given by its path or file handle. Alternatively a
scalar reference can be passed to write to a string and a code reference can be
used to write to a callback function.
=item
=item fh
ISO: L<Catmandu::Exporter::MARC::ISO> (default)
Write the output to an L<IO::Handle>. If not specified,
L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output
handle from the C<file> argument or by using STDOUT.
=item
=item fix
XML: L<Catmandu::Exporter::MARC::XML>
An ARRAY of one or more fixes or file scripts to be applied to exported items.
=item
=item type
MARCMaker: L<Catmandu::Exporter::MARC::MARCMaker>
The MARC format to parse. The following MARC parsers are available:
=item
ISO: L<Catmandu::Importer::MARC::ISO> (default) - a strict ISO 2709 exporter
ALEPHSEQ: L<Catmandu::Importer::MARC::ALEPHSEQ> - an exporter for Ex Libris Aleph sequential files
MARCMaker: L<Catmandu::Importer::MARC::MARCMaker> - an exporter for MARCMaker/MARCBreaker records
MiJ: L<Catmandu::Importer::MARC::MiJ> (MARC in JSON) - an export for the MARC-in-JSON format
XML: L<Catmandu::Importer::MARC::XML> - an exporter for the MARC XML format
MiJ: L<Catmandu::Exporter::MARC::MiJ> (MARC in JSON)
=item skip_errors
=item
If set, then any errors when parsing MARC output will be skipped and ignored. Use the
debug setting of catmandu to view all error messages:
ALEPHSEQ: L<Catmandu::Exporter::MARC::ALEPHSEQ>
$ catmandu -D convert MARC to MARC --skip_errors 1 --fix myfixes.txt < /foo/bar.mrc
=back
=item <other>
E.g.
Every MARC importer can have its own options. Check the documentation of the specific importer.
catmandu convert MARC --type XML to MARC --type ISO < marc.xml > marc.iso
=back
=head1 SEE ALSO
Expand Down
106 changes: 62 additions & 44 deletions lib/Catmandu/Importer/MARC.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use Moo;
our $VERSION = '1.171';

has type => (is => 'ro' , default => sub { 'ISO' });
has skip_errors => (is => 'ro');
has _importer => (is => 'ro');

with 'Catmandu::Importer';
Expand Down Expand Up @@ -36,7 +37,27 @@ sub BUILD {
}

sub generator {
$_[0]->_importer->generator;
my ($self) = @_;

if ($self->skip_errors) {
my $gen = $self->_importer->generator;
my $nr = 0;
sub {
my $item = 0;
do {
$nr++;
try {
$item = $gen->();
} catch {
$self->log->error("error at record $nr : $_");
};
} while (defined($item) && $item == 0);
$item;
};
}
else {
$self->_importer->generator;
}
}

1;
Expand All @@ -48,27 +69,28 @@ Catmandu::Importer::MARC - Package that imports MARC data
=head1 SYNOPSIS
use Catmandu;
# On the command line
# Convert MARC to JSON (default)
$ catmandu convert MARC < /foo/bar.mrc
# import records from file
my $importer = Catmandu->importer('MARC',file => '/foo/bar.mrc');
# Convert MARC to MARC
$ catmandu convert MARC to MARC < /foo/bar.mrc > /foo/output.mrc
my $count = $importer->each(sub {
my $record = shift;
# ...
});
# Add fixes
$ catmandu convert MARC to MARC --fix myfixes.txt < /foo/bar.mrc > /foo/output.mrc
# import records and apply a fixer
my $fixer = fixer("marc_map('245a','title')");
# Create a list of titles
$ catmandu convert MARC to TSV --fix "marc_map(245,title); retain(title)" < /foo/bar.mrc
$fixer->fix($importer)->each(sub {
my $record = shift;
printf "title: %s\n" , $record->{title};
});
# Convert MARC XML
$ catmandu convert MARC --type XML < /foo/bar.xml
# Convert MARC to JSON mapping 245a to a title with the L<catmandu> command line client:
# Convert ALEPH sequential
$ catmandu convert MARC --type ALEPHSEQ < /foo/bar.aleph
catmandu convert MARC --fix "marc_map('245a','title')" < /foo/bar.mrc
# Convert on format to another format
$ catmandu convert MARC --type ISO to MARC --type ALEPHSEQ < /foo/bar.mrc > /foo/bar.aleph
=head1 DESCRIPTION
Expand Down Expand Up @@ -118,52 +140,48 @@ L<Catmandu::Iterable>.
=head1 CONFIGURATION
In addition to the configuration provided by L<Catmandu::Importer> (C<file>,
C<fh>, etc.) the importer can be configured with the following parameters:
The 'type' parameter describes the MARC syntax variant. Supported values include:
=over
=item
ISO: L<Catmandu::Importer::MARC::ISO> (default)
=item
=item file
MicroLIF: L<Catmandu::Importer::MARC::MicroLIF>
Read input from a local file given by its path. Alternatively a scalar
reference can be passed to read from a string.
=item
=item fh
MARCMaker: L<Catmandu::Importer::MARC::MARCMaker>
Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
create the input stream from the C<file> argument or by using STDIN.
=item
=item fix
MiJ: L<Catmandu::Importer::MARC::MiJ> (MARC in JSON)
An ARRAY of one or more fixes or file scripts to be applied to imported items.
=item
=item type
XML: L<Catmandu::Importer::MARC::XML>
The MARC format to parse. The following MARC parsers are available:
=item
ISO: L<Catmandu::Importer::MARC::ISO> (default) - a strict ISO 2709 parser
RAW: L<Catmandu::Importer::MARC::RAW> - a loose ISO 2709 parser that skips faulty records
ALEPHSEQ: L<Catmandu::Importer::MARC::ALEPHSEQ> - a parser for Ex Libris Aleph sequential files
Lint: L<Catmandu::Importer::MARC::Lint> - a MARC syntax checker
MicroLIF: L<Catmandu::Importer::MARC::MicroLIF> - a parser for the MicroLIF format
MARCMaker: L<Catmandu::Importer::MARC::MARCMaker> - a parser for MARCMaker/MARCBreaker records
MiJ: L<Catmandu::Importer::MARC::MiJ> (MARC in JSON) - a parser for the MARC-in-JSON format
XML: L<Catmandu::Importer::MARC::XML> - a parser for the MARC XML format
RAW: L<Catmandu::Importer::MARC::RAW>
=item skip_errors
=item
If set, then any errors when parsing MARC input will be skipped and ignored. Use the
debug setting of catmandu to view all error messages:
Lint: L<Catmandu::Importer::MARC::Lint>
$ catmandu -D convert MARC --skip_errors 1 < /foo/bar.mrc
=item
=item <other>
ALEPHSEQ: L<Catmandu::Importer::MARC::ALEPHSEQ>
Every MARC importer can have its own options. Check the documentation of the specific importer.
=back
E.g.
catmandu convert MARC --type XML to MARC --type ISO < marc.xml > marc.iso
=head1 SEE ALSO
L<Catmandu::Exporter::MARC>
12 changes: 11 additions & 1 deletion t/01-importer.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use warnings;

use Catmandu::Importer::MARC;
use MARC::File::USMARC;
use Test::Simple tests => 9;
use Test::Simple tests => 10;

my $importer = Catmandu::Importer::MARC->new(
file => 't/camel.mrc',
Expand Down Expand Up @@ -43,3 +43,13 @@ $importer = Catmandu::Importer::MARC->new(
);
$records = $importer->to_array();
ok( $records->[0]->{'_id'} eq '2000.', 'got _id from subfield' );

# Test broken records
$importer = Catmandu::Importer::MARC->new(
file => 't/broken.xml',
type => "XML",
skip_errors => 1,
);
$records = $importer->to_array();

ok (@$records == 9, 'skipped one record');
Loading

0 comments on commit 35a1111

Please sign in to comment.