Skip to content

Commit

Permalink
Merge branch 'pr/marc-line' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
phochste committed Oct 29, 2021
2 parents be8feae + d635c9c commit 2741a4b
Show file tree
Hide file tree
Showing 7 changed files with 457 additions and 7 deletions.
15 changes: 8 additions & 7 deletions lib/Catmandu/Exporter/MARC.pm
Expand Up @@ -98,13 +98,14 @@ An ARRAY of one or more fixes or file scripts to be applied to exported items.
=item type
The MARC format to parse. The following MARC parsers are available:
ISO: L<Catmandu::Importer::MARC::ISO> (default) - a strict ISO 2709 exporter
ALEPHSEQ: L<Catmandu::Importer::MARC::ALEPHSEQ> - an exporter for Ex Libris Aleph sequential files
MARCMaker: L<Catmandu::Importer::MARC::MARCMaker> - an exporter for MARCMaker/MARCBreaker records
MiJ: L<Catmandu::Importer::MARC::MiJ> (MARC in JSON) - an export for the MARC-in-JSON format
XML: L<Catmandu::Importer::MARC::XML> - an exporter for the MARC XML format
The MARC format to export. The following MARC exporters are available:
ISO: L<Catmandu::Exporter::MARC::ISO> (default) - a strict ISO 2709 exporter
ALEPHSEQ: L<Catmandu::Exporter::MARC::ALEPHSEQ> - an exporter for Ex Libris Aleph sequential files
Line: L<Catmandu::Exporter::MARC::Line> - an exporter for Index Data's MARC Line format
MARCMaker: L<Catmandu::Exporter::MARC::MARCMaker> - an exporter for MARCMaker/MARCBreaker records
MiJ: L<Catmandu::Exporter::MARC::MiJ> (MARC in JSON) - an export for the MARC-in-JSON format
XML: L<Catmandu::Exporter::MARC::XML> - an exporter for the MARC XML format
=item skip_errors
Expand Down
121 changes: 121 additions & 0 deletions lib/Catmandu/Exporter/MARC/Line.pm
@@ -0,0 +1,121 @@
=head1 NAME
Catmandu::Exporter::MARC::Line - Exporter for MARC records to Index Data's MARC Line format
=head1 SYNOPSIS
# From the command line
$ catmandu convert MARC to MARC --type Line < t/camel.mrc
# From Perl
use Catmandu;
my $importer = Catmandu->importer('MARC', file => "t/camel.mrc", type => 'ISO');
my $exporter = Catmandu->exporter('MARC', file => "camel.line", type => 'Line' );
$exporter->add($importer);
$exporter->commit;
=head1 CONFIGURATION
=over
=item file
Write output to a local file given by its path or file handle. Alternatively a
scalar reference can be passed to write to a string and a code reference can be
used to write to a callback function.
=item fh
Write the output to an L<IO::Handle>. If not specified,
L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output
handle from the C<file> argument or by using STDOUT.
=item fix
An ARRAY of one or more fixes or file scripts to be applied to exported items.
=item encoding
Binmode of the output stream C<fh>. Set to "C<:utf8>" by default.
=back
=head1 METHODS
See L<Catmandu::Exporter>, L<Catmandu::Addable>, L<Catmandu::Fixable>,
L<Catmandu::Counter>, and L<Catmandu::Logger> for a full list of methods.
=head1 SEE ALSO
L<Catmandu::Exporter>
=cut

package Catmandu::Exporter::MARC::Line;
use Catmandu::Sane;
use Moo;

our $VERSION = '1.261';

with 'Catmandu::Exporter', 'Catmandu::Exporter::MARC::Base';

has record => (is => 'ro', default => sub {'record'});
has record_format => (is => 'ro', default => sub {'raw'});

sub add {
my ($self, $data) = @_;

if ($self->record_format eq 'MARC-in-JSON') {
$data = $self->_json_to_raw($data);
}

for my $field (@{$data->{record}}) {
my ($field, $ind1, $ind2, @sf) = @$field;

if (!defined($ind1) || $ind1 =~ /^\s*$/) {$ind1 = ' '}
if (!defined($ind2) || $ind2 =~ /^\s*$/) {$ind2 = ' '}

next unless ($field =~ /^(LDR|\d{3})/);

my @sf_map = ();

for (my $i = 0; $i < @sf; $i += 2) {
if ($field eq 'LDR' || $field < 10) {
push @sf_map, $sf[$i + 1] if (defined($sf[$i + 1]));
}
else {
push @sf_map, '$' . $sf[$i], $sf[$i + 1]
if (defined($sf[$i + 1]));
}
}

my $sf_str = join(" ", @sf_map);

my $line;

if ($field =~ /^\d{3}$/ && $field >= 10) {
$line = "$field $ind1$ind2 $sf_str\n";
}
elsif ($field eq 'LDR') {
$line = "$sf_str\n";
}
else {
$line = "$field $sf_str\n";
}

$self->fh->print($line);
}

$self->fh->print("\n");
}

sub commit {
my ($self) = @_;
$self->fh->flush;

1;
}

1;
1 change: 1 addition & 0 deletions lib/Catmandu/Importer/MARC.pm
Expand Up @@ -163,6 +163,7 @@ The MARC format to parse. The following MARC parsers are available:
ISO: L<Catmandu::Importer::MARC::ISO> (default) - a strict ISO 2709 parser
RAW: L<Catmandu::Importer::MARC::RAW> - a loose ISO 2709 parser that skips faulty records
ALEPHSEQ: L<Catmandu::Importer::MARC::ALEPHSEQ> - a parser for Ex Libris Aleph sequential files
Line: L<Catmandu::Importer::MARC::Line> - a parser for Index Data's MARC Line format
Lint: L<Catmandu::Importer::MARC::Lint> - a MARC syntax checker
MicroLIF: L<Catmandu::Importer::MARC::MicroLIF> - a parser for the MicroLIF format
MARCMaker: L<Catmandu::Importer::MARC::MARCMaker> - a parser for MARCMaker/MARCBreaker records
Expand Down
154 changes: 154 additions & 0 deletions lib/Catmandu/Importer/MARC/Line.pm
@@ -0,0 +1,154 @@
=head1 NAME
Catmandu::Importer::MARC::Line - Package that imports Index Data's MARC Line records
=head1 SYNOPSIS
# From the command line
$ catmandu convert MARC --type Line --fix 'marc_map("245a","title")' < t/code4lib.line
$ catmandu convert MARC --type Line to MARC --type XML < t/code4lib.line
# From perl
use Catmandu;
# import records from file
my $importer = Catmandu->importer('MARC',file => 't/code4lib.line' , type => 'Line');
my $fixer = Catmandu->fixer("marc_map('245a','title')");
$importer->each(sub {
my $item = shift;
...
});
# or using the fixer
$fixer->fix($importer)->each(sub {
my $item = shift;
printf "title: %s\n" , $item->{title};
});
=head1 CONFIGURATION
=over
=item file
Read input from a local file given by its path. Alternatively a scalar
reference can be passed to read from a string.
=item fh
Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
create the input stream from the C<file> argument or by using STDIN.
=item encoding
Binmode of the input stream C<fh>. Set to C<:utf8> by default.
=item fix
An ARRAY of one or more fixes or file scripts to be applied to imported items.
=back
=head1 METHODS
Every Catmandu::Importer is a Catmandu::Iterable all its methods are inherited.
=head1 SEE ALSO
L<Catmandu::Importer>,
L<Catmandu::Iterable>
=cut

package Catmandu::Importer::MARC::Line;
use Catmandu::Sane;
use Moo;

our $VERSION = '1.261';

with 'Catmandu::Importer';

sub generator {
my ($self) = @_;
sub {
state $fh = $self->fh;
state $count = 0;

# set input record separator to paragraph mode
local $/ = '';

# get next record
while (defined(my $data = $fh->getline)) {
$count++;
my @record;
my $id;
chomp $data;

# split record into fields
my @fields = split /\n/, $data;

# first field should be the MARC leader
my $leader = shift @fields;
if (length $leader == 24 && $leader =~ m/^\d{5}.*4500/) {
push @record, ['LDR', ' ', ' ', '_', $leader];
} else {
warn "not a valid MARC leader: $leader";
}
for my $field (@fields) {

# process control fields
if ($field =~ m/^00.\s/) {
my ($tag, $value) = $field =~ m/^(\d{3})\s(.*)/;
push @record, [$tag, ' ', ' ', '_', $value];

# get record id
if ($tag eq '001') {
$id = $value;
}
}

# process variable data fields
else {
my ($tag, $ind1, $ind2, $sf)
= $field =~ m/^(\d{3})\s([a-z0-9\s])([a-z0-9\s])\s(.*)/;

# check if field has content
if ($sf) {
# get subfield codes by pattern
my @sf_codes = $sf =~ m/\s?\$([a-z0-9])\s/g;

# split string by subfield code pattern
my @sf_values
= grep {length $_} split /\s?\$[a-z0-9]\s/, $sf;

if (scalar @sf_codes != scalar @sf_values) {
warn
'different number of subfield codes and values';
next;
}

push @record,
[
$tag, $ind1,
$ind2, map {$_, shift @sf_values} @sf_codes
];
}

# skip empty fields
else {
warn "field $tag has no content";
next;
}

}
}
return {_id => defined $id ? $id : $count, record => \@record};
}
return;
};
}

1;

0 comments on commit 2741a4b

Please sign in to comment.