-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
280 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package LibreCat::Dedup; | ||
|
||
use Catmandu::Sane; | ||
use Moo::Role; | ||
use namespace::clean; | ||
|
||
with 'Catmandu::Logger'; | ||
|
||
requires '_find_duplicate'; | ||
|
||
sub has_duplicate { | ||
my ($self, $data) = @_; | ||
|
||
my $dup = $self->find_duplicate($data); | ||
|
||
if ($dup && $dup->[0]) { | ||
return 1; | ||
} | ||
else { | ||
return 0; | ||
} | ||
} | ||
|
||
sub find_duplicate { | ||
my ($self, $data) = @_; | ||
|
||
$self->_find_duplicate($data); | ||
} | ||
|
||
1; | ||
|
||
__END__ | ||
=pod | ||
=head1 NAME | ||
LibreCat::Dedup - a LibreCat deduplication role | ||
=head1 SYNOPSIS | ||
package LibreCat::Dedup::Foo; | ||
use Moo; | ||
with 'LibreCat::Dedup'; | ||
sub _find_duplicate { | ||
my ($self, $data) = @_; | ||
# deduplication logic... | ||
} | ||
1; | ||
=head1 SEE ALSO | ||
L<LibreCat::Dedup::Publication> | ||
=cut |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package LibreCat::Dedup::Publication; | ||
|
||
use Catmandu::Sane; | ||
use LibreCat qw(searcher); | ||
use Moo; | ||
use namespace::clean; | ||
|
||
with 'LibreCat::Dedup'; | ||
|
||
sub _find_duplicate { | ||
my ($self, $data) = @_; | ||
|
||
my @q; | ||
push @q, "doi=\"$data->{doi}\"" if $data->{doi}; | ||
push @q, "externalidentifier=$data->{isi}" if $data->{isi}; | ||
push @q, "externalidentifier=$data->{pmid}" if $data->{pmid}; | ||
push @q, "externalidentifier=$data->{arxiv}" if $data->{arxiv}; | ||
|
||
return [] unless @q; | ||
|
||
my $dup = searcher->search("publication", | ||
{cql => join(' OR ', @q), start => 0, limit => 5})->to_array; | ||
|
||
my @ids = map {$_->{_id}} @$dup; | ||
return \@ids; | ||
} | ||
|
||
1; | ||
|
||
__END__ | ||
=pod | ||
=head1 NAME | ||
LibreCat::Dedup::Publication - a publication deduplicator | ||
=head1 SYNOPSIS | ||
use LibeCat::Dedup::Publication; | ||
my $detector = LibreCat::Dedup::Publication->new(); | ||
$detector->find_duplicate({doi => "10.2393/2342wneqe"}); | ||
=head1 METHODS | ||
=head2 has_duplicate($data) | ||
Returns 0 or 1. | ||
=head2 find_duplicate($data) | ||
Returns an ARRAYREF with publication IDs. | ||
=head1 SEE ALSO | ||
L<LibreCat>, L<LibreCat::Dedup> | ||
=cut |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
use Catmandu::Sane; | ||
use LibreCat -load => {layer_paths => [qw(t/layer)]}; | ||
use Test::More; | ||
use Test::Exception; | ||
use warnings FATAL => 'all'; | ||
|
||
my $pkg; | ||
|
||
BEGIN { | ||
$pkg = 'LibreCat::Dedup'; | ||
use_ok $pkg; | ||
}; | ||
|
||
require_ok $pkg; | ||
|
||
{ | ||
package T::Dedup; | ||
use Moo; | ||
with $pkg; | ||
|
||
sub _find_duplicate { | ||
return [1234,9876]; | ||
} | ||
} | ||
|
||
lives_ok {T::Dedup->new()}; | ||
|
||
my $d = T::Dedup->new; | ||
can_ok $d, $_ for qw(has_duplicate find_duplicate); | ||
|
||
done_testing; |
Oops, something went wrong.