Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mvn clean install -DskipTests
## Command line
https://spark.apache.org/docs/latest/submitting-applications.html
```
spark-submit --master "local[*]" --class org.csuc.cli.App --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:3.2.0_0.16.0,com.typesafe:config:1.4.1 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args
spark-submit --master "local[*]" --class org.csuc.cli.Cerif --packages info.picocli:picocli:4.6.3,com.crealytics:spark-excel_2.12:3.2.1_0.16.4,com.typesafe:config:1.4.2 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args
```
```
Usage: prc-cerif [-fhV] -i=<PATH> [-o=<PATH>] -r=<STRING>
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
<revision>2.4.18</revision>

<junit.version>4.12</junit.version>
<picocli.version>4.6.2</picocli.version>
<typesafe.config.version>1.3.4</typesafe.config.version>
<picocli.version>4.6.3</picocli.version>
<typesafe.config.version>1.4.2</typesafe.config.version>
</properties>

<dependencyManagement>
Expand Down
3 changes: 1 addition & 2 deletions transformation/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

<spark.version>3.2.0</spark.version>
<spark.excel.version>3.2.0_0.16.0</spark.excel.version>
<spark.excel.version>3.2.1_0.16.4</spark.excel.version>
</properties>

<dependencies>
Expand All @@ -36,7 +36,6 @@
<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
<version>1.4.1</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"\tOS: ${os.name} ${os.version} ${os.arch}"
}
)
public class App implements Runnable {
public class Cerif implements Runnable {
@CommandLine.Spec
CommandLine.Model.CommandSpec spec;

Expand All @@ -53,7 +53,7 @@ public class App implements Runnable {
private Instant inici;

public static void main(String[] args) {
CommandLine cmd = new CommandLine(new App());
CommandLine cmd = new CommandLine(new Cerif());
if (args.length == 0) cmd.usage(System.out);
else cmd.execute(args);
}
Expand Down Expand Up @@ -242,12 +242,13 @@ public void run() {
Dataset<Row> research_groups_join = research_groups.join(research_groups_relations, col("research_groups._c4").equalTo(col("research_groups_relations._c0")), "left").drop(col("research_groups_relations._c0"));
Dataset<Row> publication_join = publications.join(publication_relations, col("publications._c1").equalTo(col("publication_relations._c0")), "left").drop(col("publication_relations._c0"));

//departments_join.write().parquet("/tmp/departments_join");

//CERIF
Marshaller marshaller = new Marshaller(ruct);

CopyOnWriteArrayList<CfPersType> cfPersTypeList = new CopyOnWriteArrayList<>();
CopyOnWriteArrayList<CfOrgUnitType> cfOrgUnitTypeList = new CopyOnWriteArrayList<>();
CopyOnWriteArrayList<CfOrgUnitType> cfOrgUnitTypeList_2 = new CopyOnWriteArrayList<>();
CopyOnWriteArrayList<CfProjType> cfProjTypeList = new CopyOnWriteArrayList<>();
CopyOnWriteArrayList<CfResPublType> cfResPublTypeList = new CopyOnWriteArrayList<>();

Expand All @@ -265,7 +266,7 @@ public void run() {

if (research_groups_join.count() > 0) {
research_groups_join.collectAsList().forEach(row -> {
cfOrgUnitTypeList_2.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList));
cfOrgUnitTypeList.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList));
});
}

Expand Down
12 changes: 5 additions & 7 deletions transformation/src/main/java/org/csuc/marshal/Department.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,11 @@ private void createRelationCfPers() {
if (Objects.nonNull(row.getAs(8))) {
List<Row> relations = row.getList(8);

relations.forEach(relation -> {
if(relations.size() == 2){
if (Objects.nonNull(relation.getAs(2))) {
researcher(relation.getAs(2));
}
}
});
if(!relations.isEmpty()){
relations.forEach(relation -> {
if(Objects.nonNull(relation.get(1))) researcher(relation.getString(1));
});
}
}
}

Expand Down
9 changes: 4 additions & 5 deletions transformation/src/main/java/org/csuc/marshal/Project.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,10 @@ private void createRelationCfPers() {
private void researcher(String id, String ip) {
CfProjType.CfProjPers pers = new CfProjType.CfProjPers();
pers.setCfPersId(id);
if (ip.toLowerCase().equals("si")
|| ip.toLowerCase().equals("s"))
pers.setCfClassId(Semantics.getClassId(ClassId.PRINCIPAL_INVESTIGATOR));
else if (ip.toLowerCase().equals("no")
|| ip.toLowerCase().equals("n")) pers.setCfClassId(Semantics.getClassId(ClassId.CO_INVESTIGATOR));

if(Objects.nonNull(ip) && (ip.toLowerCase().equals("si")
|| ip.toLowerCase().equals("s"))) pers.setCfClassId(Semantics.getClassId(ClassId.PRINCIPAL_INVESTIGATOR));
else pers.setCfClassId(Semantics.getClassId(ClassId.CO_INVESTIGATOR));

pers.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_PROJECT_ENGAGEMENTS));
getCfTitleOrCfAbstrOrCfKeyw().add(FACTORY.createCfProjTypeCfProjPers(pers));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,9 @@ private void createRelationCfPers() {
private void researcher(String id, String direccio) {
CfResPublType.CfPersResPubl pers = new CfResPublType.CfPersResPubl();
pers.setCfPersId(id);
if (direccio.toLowerCase().equals("si")
|| direccio.toLowerCase().equals("s")) pers.setCfClassId(Semantics.getClassId(ClassId.DISS_DIRECTOR));

if(Objects.nonNull(direccio) && (direccio.toLowerCase().equals("si")
|| direccio.toLowerCase().equals("s"))) pers.setCfClassId(Semantics.getClassId(ClassId.DISS_DIRECTOR));
else pers.setCfClassId(Semantics.getClassId(ClassId.AUTHOR));

pers.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_PROFESSIONAL_RELATIONSHIPS));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,10 @@ private void createRelationCfPers() {
private void researcher(String id, String interve) {
CfOrgUnitType.CfPersOrgUnit persOrgUnit = new CfOrgUnitType.CfPersOrgUnit();
persOrgUnit.setCfPersId(id);
if (interve.toLowerCase().equals("si")
|| interve.toLowerCase().equals("s"))
persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.GROUP_LEADER));
else if (interve.toLowerCase().equals("no")
|| interve.toLowerCase().equals("n")) persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.MEMBER));

if(Objects.nonNull(interve) && (interve.toLowerCase().equals("si")
|| interve.toLowerCase().equals("s"))) persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.GROUP_LEADER));
else persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.MEMBER));

persOrgUnit.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_ORGANISATION_ROLES));
getCfNameOrCfResActOrCfKeyw().add(FACTORY.createCfOrgUnitTypeCfPersOrgUnit(persOrgUnit));
Expand Down