Skip to content

Commit

Permalink
Adding support for conditional copying of MARC data
Browse files Browse the repository at this point in the history
  • Loading branch information
phochste committed Jul 4, 2017
1 parent 9563c95 commit 3ccb320
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 44 deletions.
34 changes: 25 additions & 9 deletions lib/Catmandu/Fix/marc_copy.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ our $VERSION = '1.13';

has marc_path => (fix_arg => 1);
has path => (fix_arg => 1);
has equals => (fix_opt => 1);

sub emit {
my ($self,$fixer) = @_;
Expand All @@ -19,9 +20,10 @@ sub emit {
my $marc_obj = Catmandu::MARC->instance;

# Precompile the marc_path to gain some speed
my $marc_context = $marc_obj->compile_marc_path($self->marc_path);
my $marc_context = $marc_obj->compile_marc_path($self->marc_path, subfield_wildcard => 0);
my $marc = $fixer->capture($marc_obj);
my $marc_path = $fixer->capture($marc_context);
my $equals = $fixer->capture($self->equals);

my $var = $fixer->var;
my $result = $fixer->generate_var;
Expand All @@ -32,7 +34,8 @@ sub emit {
$perl .=<<EOF;
if (my ${result} = ${marc}->marc_copy(
${var},
${marc_path}) ) {
${marc_path},
${equals}) ) {
${result} = ref(${result}) ? ${result} : [${result}];
for ${current_value} (\@{${result}}) {
EOF
Expand Down Expand Up @@ -66,7 +69,7 @@ Catmandu::Fix::marc_copy - copy marc data in a structured way to a new field
# fixed field
marc_copy(001, fixed001)
May result into
Can result in:
fixed001 : [
{
Expand All @@ -82,7 +85,7 @@ Catmandu::Fix::marc_copy - copy marc data in a structured way to a new field
# variable field
marc_copy(650, subjects)
May result into
Can result in:
subjects:[
{
Expand Down Expand Up @@ -118,14 +121,27 @@ like tag, indicators and subfield codes into a nested data structure.
=head1 METHODS
=head2 marc_copy(MARC_TAG, JSON_PATH)
=head2 marc_copy(MARC_PATH, JSON_PATH, [equals: REGEX])
Copy this data referred by a MARC_TAG to a JSON_PATH.
Copy this MARC fields referred by a MARC_PATH to a JSON_PATH.
MARC_TAG (meaning the field tag) is the first segment of MARC_PATH.
When the MARC_PATH points to a MARC tag then only the fields mathching the MARC
tag will be copied. When the MATCH_PATH contains indicators or subfields, then
only the MARC_FIELDS which contain data in these subfields will be copied. Optional,
a C<equals> regular expression can be provided that should match the subfields that
need to be copied:
Using a MARC_PATH with subfield codes, indicators or substring will cause a
warning and these segments will be ignored when referring the data.
# Copy all the 300 fields
marc_copy(300,tmp)
# Copy all the 300 fields with indicator 1 = 1
marc_copy(300[1],tmp)
# Copy all the 300 fields which have subfield c
marc_copy(300c,tmp)
# Copy all the 300 fields which have subfield c equal to 'ABC'
marc_copy(300c,tmp,equal:"^ABC")
=head1 INLINE
Expand Down
61 changes: 49 additions & 12 deletions lib/Catmandu/MARC.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1038,32 +1038,70 @@ sub compile_marc_path {
}

sub marc_copy {
my $self = $_[0];
my $self = $_[0];
my $data = $_[1];
my $marc_path = $_[2];
my $marc_value = $_[3];

# $_[2] : marc_path
my $context = ref($_[2]) ?
$_[2] :
$self->compile_marc_path($_[2]);
my $context = ref($marc_path) ? $marc_path : $self->compile_marc_path($_[2], subfield_wildcard => 0);

confess "invalid marc path" unless $context;
carp "path segments like indicators, subfields and substrings are ignored"
if(defined $context->{subfield} or defined $context->{from} or
defined $context->{ind1} or defined $context->{ind2});

# $_[1] : data record
my $record = $_[1]->{'record'};
my $record = $data->{'record'};

return wantarray ? () : undef unless (defined $record && ref($record) eq 'ARRAY');

my $fields;
my $fields = [];

for my $field (@$record) {
my ($tag, $ind1, $ind2, @subfields) = @$field;

next if (
($context->{is_regex_field} == 0 && $field->[0] ne $context->{field} )
($context->{is_regex_field} == 0 && $tag ne $context->{field} )
||
($context->{is_regex_field} == 1 && $field->[0] !~ $context->{field_regex} )
($context->{is_regex_field} == 1 && $tag !~ $context->{field_regex} )
);

if (defined $context->{ind1}) {
if (!defined $ind1 || $ind1 ne $context->{ind1}) {
next;
}
}
if (defined $context->{ind2}) {
if (!defined $ind2 || $ind2 ne $context->{ind2}) {
next;
}
}

if ($context->{subfield}) {
my $found = 0;
for (my $i = 0; $i < @subfields; $i += 2) {
if ($subfields[$i] =~ $context->{subfield}) {
if (defined($marc_value)) {
$found = 1 if $subfields[$i+1] =~ /$marc_value/;
}
else {
$found = 1;
}
}
}
next unless $found;
}
else {
if (defined($marc_value)) {
my @sf = ();
for (my $i = 0; $i < @subfields; $i += 2) {
push @sf , $subfields[$i+1];
}

my $string = join "", @sf;

next unless ($string =~ /$marc_value/);
}
}

my $f = {};
$f->{tag} = $field->[0];

Expand Down Expand Up @@ -1181,7 +1219,6 @@ sub marc_paste {
if ($context->{subfield}) {
for (my $i = 0; $i < @subfields; $i += 2) {
if ($subfields[$i] =~ $context->{subfield}) {

if (defined($marc_value)) {
$found_match = $field_position if $subfields[$i+1] =~ /$marc_value/;
}
Expand Down
65 changes: 42 additions & 23 deletions t/26-marc_copy.t
Original file line number Diff line number Diff line change
Expand Up @@ -68,45 +68,64 @@ note 'marc_copy(245,title)';
], 'marc_map(245,title)';
}

note 'marc_copy(001/0-3,substr)';
{
warnings_like { Catmandu->importer(
'MARC',
file => \$mrc,
type => 'XML',
fix => 'marc_copy(001/0-3,substr)'
)->first} [{carped => qr/^path segments.+/},{carped => qr/^path segments.+/}], "warn on substring usage";
}

note 'marc_copy(245[,0],title)';
note 'marc_copy(245a,title)';
{
warnings_like { Catmandu->importer(
my $importer = Catmandu->importer(
'MARC',
file => \$mrc,
type => 'XML',
fix => 'marc_copy("245[,0]",title)'
)->first} [{carped => qr/^path segments.+/},{carped => qr/^path segments.+/}], "warn on substring usage";
fix => 'marc_copy(245a,title); retain_field(title)'
);
my $record = $importer->first;
is_deeply $record->{title},
[
{
tag => '245',
ind1 => '1',
ind2 => '0',
subfields => [
{ a => 'Title / '},
{ c => 'Name' },
]
}
], 'marc_map(245a,title)';
}


note 'marc_copy(245[1],title)';
note 'marc_copy(245x,title)';
{
warnings_like { Catmandu->importer(
my $importer = Catmandu->importer(
'MARC',
file => \$mrc,
type => 'XML',
fix => 'marc_copy(245[1],title)'
)->first} [{carped => qr/^path segments.+/},{carped => qr/^path segments.+/}], "warn on substring usage";
fix => 'marc_copy(245x,title); retain_field(title)'
);
my $record = $importer->first;
is_deeply $record->{title},
[
], 'marc_map(245x,title)';
}

note 'marc_copy(245a,title)';
note 'marc_copy(245a,title,equals:"Title / ")';
{
warnings_like { Catmandu->importer(
my $importer = Catmandu->importer(
'MARC',
file => \$mrc,
type => 'XML',
fix => 'marc_copy(245a,title)'
)->first} [{carped => qr/^path segments.+/},{carped => qr/^path segments.+/}], "warn on substring usage";
fix => 'marc_copy(245a,title,equals:"Title / "); retain_field(title)'
);
my $record = $importer->first;
is_deeply $record->{title},
[
{
tag => '245',
ind1 => '1',
ind2 => '0',
subfields => [
{ a => 'Title / '},
{ c => 'Name' },
]
}
], 'marc_map(245a,title,equals:"Title / ")';
}

note 'marc_copy(999,local)';
Expand Down

0 comments on commit 3ccb320

Please sign in to comment.