From 940e805be811645f3412c6850e06d74b208cfa72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Mart=C3=ADnez?= Date: Wed, 12 Jan 2022 08:03:19 +0100 Subject: [PATCH 1/2] fix Publications --- README.md | 2 +- .../main/java/org/csuc/cli/{App.java => Cerif.java} | 9 +++++---- .../src/main/java/org/csuc/marshal/Department.java | 12 +++++------- .../src/main/java/org/csuc/marshal/Project.java | 9 ++++----- .../src/main/java/org/csuc/marshal/Publication.java | 5 +++-- .../main/java/org/csuc/marshal/ResearchGroup.java | 9 ++++----- 6 files changed, 22 insertions(+), 24 deletions(-) rename transformation/src/main/java/org/csuc/cli/{App.java => Cerif.java} (98%) diff --git a/README.md b/README.md index 8900513..6dfb330 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ mvn clean install -DskipTests ## Command line https://spark.apache.org/docs/latest/submitting-applications.html ``` -spark-submit --master "local[*]" --class org.csuc.cli.App --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:3.2.0_0.16.0,com.typesafe:config:1.4.1 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args +spark-submit --master "local[*]" --class org.csuc.cli.Cerif --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:3.2.0_0.16.0,com.typesafe:config:1.4.1 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args ``` ``` Usage: prc-cerif [-fhV] -i= [-o=] -r= diff --git a/transformation/src/main/java/org/csuc/cli/App.java b/transformation/src/main/java/org/csuc/cli/Cerif.java similarity index 98% rename from transformation/src/main/java/org/csuc/cli/App.java rename to transformation/src/main/java/org/csuc/cli/Cerif.java index f02592e..430a7fc 100644 --- a/transformation/src/main/java/org/csuc/cli/App.java +++ b/transformation/src/main/java/org/csuc/cli/Cerif.java @@ -34,7 +34,7 @@ "\tOS: ${os.name} ${os.version} ${os.arch}" } ) -public class App implements Runnable { +public class Cerif implements Runnable { @CommandLine.Spec CommandLine.Model.CommandSpec spec; @@ -53,7 +53,7 @@ public class App implements Runnable { private Instant inici; public static void main(String[] args) { - CommandLine cmd = new CommandLine(new App()); + CommandLine cmd = new CommandLine(new Cerif()); if (args.length == 0) cmd.usage(System.out); else cmd.execute(args); } @@ -242,12 +242,13 @@ public void run() { Dataset research_groups_join = research_groups.join(research_groups_relations, col("research_groups._c4").equalTo(col("research_groups_relations._c0")), "left").drop(col("research_groups_relations._c0")); Dataset publication_join = publications.join(publication_relations, col("publications._c1").equalTo(col("publication_relations._c0")), "left").drop(col("publication_relations._c0")); + //departments_join.write().parquet("/tmp/departments_join"); + //CERIF Marshaller marshaller = new Marshaller(ruct); CopyOnWriteArrayList cfPersTypeList = new CopyOnWriteArrayList<>(); CopyOnWriteArrayList cfOrgUnitTypeList = new CopyOnWriteArrayList<>(); - CopyOnWriteArrayList cfOrgUnitTypeList_2 = new CopyOnWriteArrayList<>(); CopyOnWriteArrayList cfProjTypeList = new CopyOnWriteArrayList<>(); CopyOnWriteArrayList cfResPublTypeList = new CopyOnWriteArrayList<>(); @@ -265,7 +266,7 @@ public void run() { if (research_groups_join.count() > 0) { research_groups_join.collectAsList().forEach(row -> { - cfOrgUnitTypeList_2.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList)); + cfOrgUnitTypeList.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList)); }); } diff --git a/transformation/src/main/java/org/csuc/marshal/Department.java b/transformation/src/main/java/org/csuc/marshal/Department.java index a68e963..4aec39d 100644 --- a/transformation/src/main/java/org/csuc/marshal/Department.java +++ b/transformation/src/main/java/org/csuc/marshal/Department.java @@ -114,13 +114,11 @@ private void createRelationCfPers() { if (Objects.nonNull(row.getAs(8))) { List relations = row.getList(8); - relations.forEach(relation -> { - if(relations.size() == 2){ - if (Objects.nonNull(relation.getAs(2))) { - researcher(relation.getAs(2)); - } - } - }); + if(!relations.isEmpty()){ + relations.forEach(relation -> { + if(Objects.nonNull(relation.get(1))) researcher(relation.getString(1)); + }); + } } } diff --git a/transformation/src/main/java/org/csuc/marshal/Project.java b/transformation/src/main/java/org/csuc/marshal/Project.java index 55e2903..71dbcbc 100644 --- a/transformation/src/main/java/org/csuc/marshal/Project.java +++ b/transformation/src/main/java/org/csuc/marshal/Project.java @@ -124,11 +124,10 @@ private void createRelationCfPers() { private void researcher(String id, String ip) { CfProjType.CfProjPers pers = new CfProjType.CfProjPers(); pers.setCfPersId(id); - if (ip.toLowerCase().equals("si") - || ip.toLowerCase().equals("s")) - pers.setCfClassId(Semantics.getClassId(ClassId.PRINCIPAL_INVESTIGATOR)); - else if (ip.toLowerCase().equals("no") - || ip.toLowerCase().equals("n")) pers.setCfClassId(Semantics.getClassId(ClassId.CO_INVESTIGATOR)); + + if(Objects.nonNull(ip) && (ip.toLowerCase().equals("si") + || ip.toLowerCase().equals("s"))) pers.setCfClassId(Semantics.getClassId(ClassId.PRINCIPAL_INVESTIGATOR)); + else pers.setCfClassId(Semantics.getClassId(ClassId.CO_INVESTIGATOR)); pers.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_PROJECT_ENGAGEMENTS)); getCfTitleOrCfAbstrOrCfKeyw().add(FACTORY.createCfProjTypeCfProjPers(pers)); diff --git a/transformation/src/main/java/org/csuc/marshal/Publication.java b/transformation/src/main/java/org/csuc/marshal/Publication.java index 10b0208..a5e2de0 100644 --- a/transformation/src/main/java/org/csuc/marshal/Publication.java +++ b/transformation/src/main/java/org/csuc/marshal/Publication.java @@ -176,8 +176,9 @@ private void createRelationCfPers() { private void researcher(String id, String direccio) { CfResPublType.CfPersResPubl pers = new CfResPublType.CfPersResPubl(); pers.setCfPersId(id); - if (direccio.toLowerCase().equals("si") - || direccio.toLowerCase().equals("s")) pers.setCfClassId(Semantics.getClassId(ClassId.DISS_DIRECTOR)); + + if(Objects.nonNull(direccio) && (direccio.toLowerCase().equals("si") + || direccio.toLowerCase().equals("s"))) pers.setCfClassId(Semantics.getClassId(ClassId.DISS_DIRECTOR)); else pers.setCfClassId(Semantics.getClassId(ClassId.AUTHOR)); pers.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_PROFESSIONAL_RELATIONSHIPS)); diff --git a/transformation/src/main/java/org/csuc/marshal/ResearchGroup.java b/transformation/src/main/java/org/csuc/marshal/ResearchGroup.java index a8f1176..2908039 100644 --- a/transformation/src/main/java/org/csuc/marshal/ResearchGroup.java +++ b/transformation/src/main/java/org/csuc/marshal/ResearchGroup.java @@ -136,11 +136,10 @@ private void createRelationCfPers() { private void researcher(String id, String interve) { CfOrgUnitType.CfPersOrgUnit persOrgUnit = new CfOrgUnitType.CfPersOrgUnit(); persOrgUnit.setCfPersId(id); - if (interve.toLowerCase().equals("si") - || interve.toLowerCase().equals("s")) - persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.GROUP_LEADER)); - else if (interve.toLowerCase().equals("no") - || interve.toLowerCase().equals("n")) persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.MEMBER)); + + if(Objects.nonNull(interve) && (interve.toLowerCase().equals("si") + || interve.toLowerCase().equals("s"))) persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.GROUP_LEADER)); + else persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.MEMBER)); persOrgUnit.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_ORGANISATION_ROLES)); getCfNameOrCfResActOrCfKeyw().add(FACTORY.createCfOrgUnitTypeCfPersOrgUnit(persOrgUnit)); From 348ac05d9f850bc64363e46ef789dfd7c34f1d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Albert=20Mart=C3=ADnez?= Date: Tue, 8 Mar 2022 13:03:06 +0100 Subject: [PATCH 2/2] Bump dependencies com.typesafe:config 1.4.1 -> 1.4.2 info.picocli:picocli 4.6.2 -> 4.6.3 com.crealytics:spark-excel_2.12 3.2.0_0.16.0 -> 3.2.1_0.16.4 --- README.md | 2 +- pom.xml | 4 ++-- transformation/pom.xml | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6dfb330..595cf2c 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ mvn clean install -DskipTests ## Command line https://spark.apache.org/docs/latest/submitting-applications.html ``` -spark-submit --master "local[*]" --class org.csuc.cli.Cerif --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:3.2.0_0.16.0,com.typesafe:config:1.4.1 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args +spark-submit --master "local[*]" --class org.csuc.cli.Cerif --packages info.picocli:picocli:4.6.3,com.crealytics:spark-excel_2.12:3.2.1_0.16.4,com.typesafe:config:1.4.2 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args ``` ``` Usage: prc-cerif [-fhV] -i= [-o=] -r= diff --git a/pom.xml b/pom.xml index 0d86ab3..53cb514 100644 --- a/pom.xml +++ b/pom.xml @@ -25,8 +25,8 @@ 2.4.18 4.12 - 4.6.2 - 1.3.4 + 4.6.3 + 1.4.2 diff --git a/transformation/pom.xml b/transformation/pom.xml index f3f0099..6e84f2b 100644 --- a/transformation/pom.xml +++ b/transformation/pom.xml @@ -17,7 +17,7 @@ UTF-8 3.2.0 - 3.2.0_0.16.0 + 3.2.1_0.16.4 @@ -36,7 +36,6 @@ com.typesafe config - 1.4.1