Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'pr/marc-line' into dev
- Loading branch information
Showing
7 changed files
with
457 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
=head1 NAME | ||
Catmandu::Exporter::MARC::Line - Exporter for MARC records to Index Data's MARC Line format | ||
=head1 SYNOPSIS | ||
# From the command line | ||
$ catmandu convert MARC to MARC --type Line < t/camel.mrc | ||
# From Perl | ||
use Catmandu; | ||
my $importer = Catmandu->importer('MARC', file => "t/camel.mrc", type => 'ISO'); | ||
my $exporter = Catmandu->exporter('MARC', file => "camel.line", type => 'Line' ); | ||
$exporter->add($importer); | ||
$exporter->commit; | ||
=head1 CONFIGURATION | ||
=over | ||
=item file | ||
Write output to a local file given by its path or file handle. Alternatively a | ||
scalar reference can be passed to write to a string and a code reference can be | ||
used to write to a callback function. | ||
=item fh | ||
Write the output to an L<IO::Handle>. If not specified, | ||
L<Catmandu::Util::io|Catmandu::Util/IO-functions> is used to create the output | ||
handle from the C<file> argument or by using STDOUT. | ||
=item fix | ||
An ARRAY of one or more fixes or file scripts to be applied to exported items. | ||
=item encoding | ||
Binmode of the output stream C<fh>. Set to "C<:utf8>" by default. | ||
=back | ||
=head1 METHODS | ||
See L<Catmandu::Exporter>, L<Catmandu::Addable>, L<Catmandu::Fixable>, | ||
L<Catmandu::Counter>, and L<Catmandu::Logger> for a full list of methods. | ||
=head1 SEE ALSO | ||
L<Catmandu::Exporter> | ||
=cut | ||
|
||
package Catmandu::Exporter::MARC::Line; | ||
use Catmandu::Sane; | ||
use Moo; | ||
|
||
our $VERSION = '1.261'; | ||
|
||
with 'Catmandu::Exporter', 'Catmandu::Exporter::MARC::Base'; | ||
|
||
has record => (is => 'ro', default => sub {'record'}); | ||
has record_format => (is => 'ro', default => sub {'raw'}); | ||
|
||
sub add { | ||
my ($self, $data) = @_; | ||
|
||
if ($self->record_format eq 'MARC-in-JSON') { | ||
$data = $self->_json_to_raw($data); | ||
} | ||
|
||
for my $field (@{$data->{record}}) { | ||
my ($field, $ind1, $ind2, @sf) = @$field; | ||
|
||
if (!defined($ind1) || $ind1 =~ /^\s*$/) {$ind1 = ' '} | ||
if (!defined($ind2) || $ind2 =~ /^\s*$/) {$ind2 = ' '} | ||
|
||
next unless ($field =~ /^(LDR|\d{3})/); | ||
|
||
my @sf_map = (); | ||
|
||
for (my $i = 0; $i < @sf; $i += 2) { | ||
if ($field eq 'LDR' || $field < 10) { | ||
push @sf_map, $sf[$i + 1] if (defined($sf[$i + 1])); | ||
} | ||
else { | ||
push @sf_map, '$' . $sf[$i], $sf[$i + 1] | ||
if (defined($sf[$i + 1])); | ||
} | ||
} | ||
|
||
my $sf_str = join(" ", @sf_map); | ||
|
||
my $line; | ||
|
||
if ($field =~ /^\d{3}$/ && $field >= 10) { | ||
$line = "$field $ind1$ind2 $sf_str\n"; | ||
} | ||
elsif ($field eq 'LDR') { | ||
$line = "$sf_str\n"; | ||
} | ||
else { | ||
$line = "$field $sf_str\n"; | ||
} | ||
|
||
$self->fh->print($line); | ||
} | ||
|
||
$self->fh->print("\n"); | ||
} | ||
|
||
sub commit { | ||
my ($self) = @_; | ||
$self->fh->flush; | ||
|
||
1; | ||
} | ||
|
||
1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
=head1 NAME | ||
Catmandu::Importer::MARC::Line - Package that imports Index Data's MARC Line records | ||
=head1 SYNOPSIS | ||
# From the command line | ||
$ catmandu convert MARC --type Line --fix 'marc_map("245a","title")' < t/code4lib.line | ||
$ catmandu convert MARC --type Line to MARC --type XML < t/code4lib.line | ||
# From perl | ||
use Catmandu; | ||
# import records from file | ||
my $importer = Catmandu->importer('MARC',file => 't/code4lib.line' , type => 'Line'); | ||
my $fixer = Catmandu->fixer("marc_map('245a','title')"); | ||
$importer->each(sub { | ||
my $item = shift; | ||
... | ||
}); | ||
# or using the fixer | ||
$fixer->fix($importer)->each(sub { | ||
my $item = shift; | ||
printf "title: %s\n" , $item->{title}; | ||
}); | ||
=head1 CONFIGURATION | ||
=over | ||
=item file | ||
Read input from a local file given by its path. Alternatively a scalar | ||
reference can be passed to read from a string. | ||
=item fh | ||
Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to | ||
create the input stream from the C<file> argument or by using STDIN. | ||
=item encoding | ||
Binmode of the input stream C<fh>. Set to C<:utf8> by default. | ||
=item fix | ||
An ARRAY of one or more fixes or file scripts to be applied to imported items. | ||
=back | ||
=head1 METHODS | ||
Every Catmandu::Importer is a Catmandu::Iterable all its methods are inherited. | ||
=head1 SEE ALSO | ||
L<Catmandu::Importer>, | ||
L<Catmandu::Iterable> | ||
=cut | ||
|
||
package Catmandu::Importer::MARC::Line; | ||
use Catmandu::Sane; | ||
use Moo; | ||
|
||
our $VERSION = '1.261'; | ||
|
||
with 'Catmandu::Importer'; | ||
|
||
sub generator { | ||
my ($self) = @_; | ||
sub { | ||
state $fh = $self->fh; | ||
state $count = 0; | ||
|
||
# set input record separator to paragraph mode | ||
local $/ = ''; | ||
|
||
# get next record | ||
while (defined(my $data = $fh->getline)) { | ||
$count++; | ||
my @record; | ||
my $id; | ||
chomp $data; | ||
|
||
# split record into fields | ||
my @fields = split /\n/, $data; | ||
|
||
# first field should be the MARC leader | ||
my $leader = shift @fields; | ||
if (length $leader == 24 && $leader =~ m/^\d{5}.*4500/) { | ||
push @record, ['LDR', ' ', ' ', '_', $leader]; | ||
} else { | ||
warn "not a valid MARC leader: $leader"; | ||
} | ||
for my $field (@fields) { | ||
|
||
# process control fields | ||
if ($field =~ m/^00.\s/) { | ||
my ($tag, $value) = $field =~ m/^(\d{3})\s(.*)/; | ||
push @record, [$tag, ' ', ' ', '_', $value]; | ||
|
||
# get record id | ||
if ($tag eq '001') { | ||
$id = $value; | ||
} | ||
} | ||
|
||
# process variable data fields | ||
else { | ||
my ($tag, $ind1, $ind2, $sf) | ||
= $field =~ m/^(\d{3})\s([a-z0-9\s])([a-z0-9\s])\s(.*)/; | ||
|
||
# check if field has content | ||
if ($sf) { | ||
# get subfield codes by pattern | ||
my @sf_codes = $sf =~ m/\s?\$([a-z0-9])\s/g; | ||
|
||
# split string by subfield code pattern | ||
my @sf_values | ||
= grep {length $_} split /\s?\$[a-z0-9]\s/, $sf; | ||
|
||
if (scalar @sf_codes != scalar @sf_values) { | ||
warn | ||
'different number of subfield codes and values'; | ||
next; | ||
} | ||
|
||
push @record, | ||
[ | ||
$tag, $ind1, | ||
$ind2, map {$_, shift @sf_values} @sf_codes | ||
]; | ||
} | ||
|
||
# skip empty fields | ||
else { | ||
warn "field $tag has no content"; | ||
next; | ||
} | ||
|
||
} | ||
} | ||
return {_id => defined $id ? $id : $count, record => \@record}; | ||
} | ||
return; | ||
}; | ||
} | ||
|
||
1; |
Oops, something went wrong.