Skip to content

Commit

Permalink
Merge branch 'feature_java-11'
Browse files Browse the repository at this point in the history
  • Loading branch information
PastorGL committed Mar 7, 2023
2 parents 80217fb + 9c5bf3c commit b9d5df4
Show file tree
Hide file tree
Showing 32 changed files with 182 additions and 1,874 deletions.
79 changes: 12 additions & 67 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
<configuration>
<addResourcesToClasspath>true</addResourcesToClasspath>
<classpathScope>test</classpathScope>
<mainClass>io.github.pastorgl.datacooker.doc.DocGen</mainClass>
<mainClass>io.github.pastorgl.datacooker.dist.doc.DocGen</mainClass>
<arguments>
<argument>${docs.output}</argument>
</arguments>
Expand All @@ -131,61 +131,18 @@
</build>

<dependencies>
<dependency>
<groupId>io.github.pastorgl.datacooker</groupId>
<artifactId>config</artifactId>
<version>3.0.11</version>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.13.3</version>
</dependency>
<!-- EMR 6.9 -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.13</artifactId>
<version>3.3.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.3</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.13.8</version>
</dependency>
<!-- end EMR deps -->
<dependency>
<groupId>com.google.flatbuffers</groupId>
<artifactId>flatbuffers-java</artifactId>
<version>23.1.21</version>
</dependency>

<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>5.7.1</version>
<exclusions>
<exclusion>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>1.12.3</version>
<groupId>io.github.pastorgl.datacooker</groupId>
<artifactId>commons</artifactId>
<version>3.0.11</version>
</dependency>

<dependency>
<groupId>com.github.alexmojaki</groupId>
<artifactId>s3-stream-upload</artifactId>
Expand All @@ -197,26 +154,14 @@
</exclusion>
</exclusions>
</dependency>
<dependency><!-- EMR 6.9 -->
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<version>1.12.170</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>io.github.pastorgl.datacooker</groupId>
<artifactId>cli</artifactId>
<version>3.0.11</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.velocity</groupId>
<artifactId>velocity-engine-core</artifactId>
Expand Down
218 changes: 0 additions & 218 deletions src/main/java/io/github/pastorgl/datacooker/data/BinRec.java

This file was deleted.

28 changes: 16 additions & 12 deletions src/main/java/io/github/pastorgl/datacooker/dist/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
package io.github.pastorgl.datacooker.dist;

import io.github.pastorgl.datacooker.config.InvalidConfigurationException;
import io.github.pastorgl.datacooker.storage.DataHolder;
import io.github.pastorgl.datacooker.data.DataStream;
import io.github.pastorgl.datacooker.storage.AdapterInfo;
import io.github.pastorgl.datacooker.storage.Adapters;
import io.github.pastorgl.datacooker.storage.InputAdapter;
import io.github.pastorgl.datacooker.storage.OutputAdapter;
Expand Down Expand Up @@ -102,28 +103,31 @@ public static void main(String[] args) {
String from = distTask.source.adapter;
String to = distTask.dest.adapter;

InputAdapter inputAdapter = Adapters.inputAdapter(from);
AdapterInfo inputAdapter = Adapters.INPUTS.get(from);
if (inputAdapter == null) {
throw new InvalidConfigurationException("Adapter named '" + from + "' not found");
}

inputAdapter.initialize(context);
Map<String, Object> params = new HashMap<>(globalParams);
params.putAll(distTask.source.params);
inputAdapter.configure(params);
List<DataHolder> rdd = inputAdapter.load(distTask.source.path);
InputAdapter ia = (InputAdapter) inputAdapter.configurable.getDeclaredConstructor().newInstance();
io.github.pastorgl.datacooker.config.Configuration config = new io.github.pastorgl.datacooker.config.Configuration(ia.meta.definitions, "Input " + ia.meta.verb, params);
ia.initialize(context, config, distTask.source.path);

for (DataHolder ds : rdd) {
OutputAdapter outputAdapter = Adapters.outputAdapter(to);
Map<String, DataStream> rdds = ia.load();

for (Map.Entry<String, DataStream> ds : rdds.entrySet()) {
AdapterInfo outputAdapter = Adapters.OUTPUTS.get(to);
if (outputAdapter == null) {
throw new InvalidConfigurationException("Adapter named '" + to + "' not found");
}

outputAdapter.initialize(context);
params = new HashMap<>(globalParams);
params.putAll(distTask.dest.params);
outputAdapter.configure(params);
outputAdapter.save(distTask.dest.path, ds);
OutputAdapter oa = (OutputAdapter) outputAdapter.configurable.getDeclaredConstructor().newInstance();
HashMap<String, Object> outParams = new HashMap<>(globalParams);
outParams.putAll(distTask.dest.params);
oa.initialize(context, new io.github.pastorgl.datacooker.config.Configuration(oa.meta.definitions, "Output " + oa.meta.verb, outParams), distTask.dest.path);

oa.save(ds.getKey(), ds.getValue());
}
}
} catch (Exception ex) {
Expand Down

This file was deleted.

Loading

0 comments on commit b9d5df4

Please sign in to comment.