Skip to content

Commit

Permalink
Merge d8da214 into bfb5b13
Browse files Browse the repository at this point in the history
  • Loading branch information
vpeil committed Dec 3, 2019
2 parents bfb5b13 + d8da214 commit 9981d50
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 38 deletions.
1 change: 1 addition & 0 deletions config/locale.de.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ error:
update_failed: "Publikation %s konnte nicht gespeichert werden. Das Support-Team wurde benachrichtigt."
contact_admin: "Bitte kontaktieren Sie %s, falls das Problem bestehen bleibt."
preliminary_submit: "Das Formular wurde abgschickt bevor diese volständig geladen wurde. Alle Änderungen wurden verworfen."
duplicate_import: "Es gibt bereits einen Datensatz mit ID %s aus %s in der Datenbank: <a href='/librecat/record/edit/%s'>Eintrag</a>."
footer:
powered_by: "Powered by <a href='http://www.librecat.org/'>LibreCat</a>"
styles:
Expand Down
1 change: 1 addition & 0 deletions config/locale.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ error:
update_failed: "Failed to update record %s. The admins have been notified."
contact_admin: "Please, contact %s when this problem persists."
preliminary_submit: "Form was submitted before it was fully loaded. All requested changes have been discarded to prevent data loss."
duplicate_import: "There already is a record with ID %s from %s in the database: <a href='/librecat/record/edit/%s'>Entry</a>."
footer:
powered_by: "Powered by <a href='http://www.librecat.org/'>LibreCat</a>"
styles:
Expand Down
120 changes: 82 additions & 38 deletions lib/LibreCat/App/Catalogue/Route/importer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,21 @@ LibreCat::App::Catalogue::Route::importer - central handler for import routes
=cut

use Catmandu::Sane;
use Catmandu::Util;
use LibreCat qw(publication);
use Catmandu::Fix::trim as => 'trim';
use Catmandu::Util;
use Dancer ':syntax';
use LibreCat qw(publication);
use LibreCat::App::Helper;
use URL::Encode qw(url_decode);
use LibreCat::Dedup::Publication;
use Try::Tiny;
use URL::Encode qw(url_decode);

my %SOURCE_MAP = (
crossref => "doi",
epmc => "pmid",
arxiv => "arxiv",
inspire => "inspire",
);

sub _fetch_record {
my ($id, $source) = @_;
Expand All @@ -27,15 +35,16 @@ sub _fetch_record {

my $data = Catmandu->importer(
'getJSON',
from => url_decode("https://api.crossref.org/works/$id/agency"),
from =>
url_decode("https://api.crossref.org/works/$id/agency"),
timeout => 10,
warn => 0 ,
warn => 0,
)->first;

if (!$data) {
$source = "crossref";
}
elsif ( $data->{message}
elsif ($data->{message}
&& $data->{message}->{agency}->{id} eq "datacite")
{
$source = "datacite";
Expand All @@ -45,8 +54,8 @@ sub _fetch_record {
}
}

my $pkg
= Catmandu::Util::require_package($source, 'LibreCat::FetchRecord');
my $pkg = Catmandu::Util::require_package($source,
'LibreCat::FetchRecord');

unless ($pkg) {
h->log->error("failed to load LibreCat::FetchRecord::$source");
Expand All @@ -56,18 +65,36 @@ sub _fetch_record {
h->log->debug("Processing LibreCat::FetchRecord::$source $id");

return $pkg->new->fetch($id);
} catch {
}
catch {
h->log->error("Failed to fetch $id from $source");
return undef;
}
}

sub _check_for_duplicate {
my ($pub) = @_;

my $data;

$data->{doi} = $pub->{doi} if $pub->{doi};
$data->{isi} = $pub->{isi} if $pub->{isi};
$data->{pmid} = $pub->{pmid} if $pub->{pmid};
$data->{arxiv} = $pub->{arxiv} if $pub->{arxiv};

state $detector = LibreCat::Dedup::Publication->new();

return $detector->find_duplicate($data);
}

=head2 GET /librecat/record/import
Returns again to the add record page
=cut

get '/librecat/record/import' => sub {

# Required route for 'return_url' mechanism...
redirect h->uri_for('/librecat/record/new');
};
Expand All @@ -92,12 +119,25 @@ post '/librecat/record/import' => sub {
: $p->{data};
my $source = $p->{source};

my $dup = _check_for_duplicate({$SOURCE_MAP{$source} => $id,});

if ($dup && $dup->[0]) {
return template "backend/add_new",
{
error => sprintf(
h->loc('error.duplicate_import'),
$id, $source, $dup->[0]
),
imported => []
};
}

my $imported_records = _fetch_record($p->{id} // $data, $source);

unless (Catmandu::Util::is_array_ref($imported_records)) {
return template "backend/add_new",
{
error => "Import from $source failed - try later again" ,
error => "Import from $source failed - try later again",
imported => []
};
}
Expand All @@ -107,53 +147,57 @@ post '/librecat/record/import' => sub {
for my $pub (@$imported_records) {
$pub->{_id} = $bag->generate_id;
$pub->{status} = 'new'
; # new is the status of records not checked by users/reviewers
; # new is the status of records not checked by users/reviewers
$pub->{creator}
= {id => session->{user_id}, login => session->{user}};
$pub->{user_id} = session->{user_id};
$pub->{department} = $user->{department};

# If we allow bulk imports, add all the imported records
# otherwise return the first record
if(h->config->{web_bulk_import} or !exists h->config->{web_bulk_import}){
# Use config/hooks.yml to register functions
# that should run before/after importing publications
h->hook('import-new-' . $source)->fix_around(
$pub,
sub {
publication->add($pub ,
on_success => sub {
my ($rec) = @_;
push @saved_records , $rec;
}
);
}
);
if (h->config->{web_bulk_import}
or !exists h->config->{web_bulk_import})
{
# Use config/hooks.yml to register functions
# that should run before/after importing publications
h->hook('import-new-' . $source)->fix_around(
$pub,
sub {
publication->add(
$pub,
on_success => sub {
my ($rec) = @_;
push @saved_records, $rec;
}
);
}
);
}
else {
my $type = $pub->{type} || 'journal_article';
my $templatepath = "backend/forms";
$pub->{new_record} = 1;
my $type = $pub->{type} || 'journal_article';
my $templatepath = "backend/forms";
$pub->{new_record} = 1;

return template $templatepath . "/$type.tt", $pub;
return template "$templatepath/$type.tt", $pub;
}
}

my $errors = int(@$imported_records) - int(@saved_records);

if ($errors) {
return template "backend/add_new", {
error => $errors == 1 ? "1 import failed" : "$errors imports failed"
}
return template "backend/add_new",
{error => $errors == 1
? "1 import failed"
: "$errors imports failed"};
}
else {
return template "backend/add_new",
{
ok => "Imported "
. int(@saved_records)
. " record(s) from $source",
imported => \@saved_records ,
};
{
ok => "Imported "
. int(@saved_records)
. " record(s) from $source",
imported => \@saved_records,
};
}
};

Expand Down
60 changes: 60 additions & 0 deletions lib/LibreCat/Dedup.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package LibreCat::Dedup;

use Catmandu::Sane;
use Moo::Role;
use namespace::clean;

with 'Catmandu::Logger';

requires '_find_duplicate';

sub has_duplicate {
my ($self, $data) = @_;

my $dup = $self->find_duplicate($data);

if ($dup && $dup->[0]) {
return 1;
}
else {
return 0;
}
}

sub find_duplicate {
my ($self, $data) = @_;

$self->_find_duplicate($data);
}

1;

__END__
=pod
=head1 NAME
LibreCat::Dedup - a LibreCat deduplication role
=head1 SYNOPSIS
package LibreCat::Dedup::Foo;
use Moo;
with 'LibreCat::Dedup';
sub _find_duplicate {
my ($self, $data) = @_;
# deduplication logic...
}
1;
=head1 SEE ALSO
L<LibreCat::Dedup::Publication>
=cut
60 changes: 60 additions & 0 deletions lib/LibreCat/Dedup/Publication.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package LibreCat::Dedup::Publication;

use Catmandu::Sane;
use LibreCat qw(searcher);
use Moo;
use namespace::clean;

with 'LibreCat::Dedup';

sub _find_duplicate {
my ($self, $data) = @_;

my @q;
push @q, "doi=\"$data->{doi}\"" if $data->{doi};
push @q, "externalidentifier=$data->{isi}" if $data->{isi};
push @q, "externalidentifier=$data->{pmid}" if $data->{pmid};
push @q, "externalidentifier=$data->{arxiv}" if $data->{arxiv};

return [] unless @q;

my $dup = searcher->search("publication",
{cql => join(' OR ', @q), start => 0, limit => 5})->to_array;

my @ids = map {$_->{_id}} @$dup;
return \@ids;
}

1;

__END__
=pod
=head1 NAME
LibreCat::Dedup::Publication - a publication deduplicator
=head1 SYNOPSIS
use LibeCat::Dedup::Publication;
my $detector = LibreCat::Dedup::Publication->new();
$detector->find_duplicate({doi => "10.2393/2342wneqe"});
=head1 METHODS
=head2 has_duplicate($data)
Returns 0 or 1.
=head2 find_duplicate($data)
Returns an ARRAYREF with publication IDs.
=head1 SEE ALSO
L<LibreCat>, L<LibreCat::Dedup>
=cut
31 changes: 31 additions & 0 deletions t/LibreCat/Dedup.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use Catmandu::Sane;
use LibreCat -load => {layer_paths => [qw(t/layer)]};
use Test::More;
use Test::Exception;
use warnings FATAL => 'all';

my $pkg;

BEGIN {
$pkg = 'LibreCat::Dedup';
use_ok $pkg;
};

require_ok $pkg;

{
package T::Dedup;
use Moo;
with $pkg;

sub _find_duplicate {
return [1234,9876];
}
}

lives_ok {T::Dedup->new()};

my $d = T::Dedup->new;
can_ok $d, $_ for qw(has_duplicate find_duplicate);

done_testing;

0 comments on commit 9981d50

Please sign in to comment.