Skip to content

Commit

Permalink
Exclude spark from core jar (#281)
Browse files Browse the repository at this point in the history
Excludes spark files by executing maven-jar plugin once more to create a jar without spark. After maven-shade creates uber jar, replacing the excluded one with regular jar.
  • Loading branch information
eneskuluk committed Apr 19, 2023
1 parent ed40e6c commit b0f4487
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 61 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ env:
jobs:
build-and-push-mac-dylib:
runs-on: macos-11
outputs:
tag_message: ${{env.TAG_MESSAGE}}
permissions:
contents: read
packages: write
Expand All @@ -26,6 +28,12 @@ jobs:

- name: Set version number
run: echo VERSION_NUMBER=${GITHUB_REF_NAME:1} >> $GITHUB_ENV

- name: Set tag messsage
run: |
git fetch --tags --force
echo "TAG_MESSAGE=$(git tag -l --sort=-taggerdate --format='%(contents)' $(git describe --tags $(git branch --show-current) ))" >> $GITHUB_ENV
- name: Install Prerequisites
shell: bash
Expand Down Expand Up @@ -62,10 +70,11 @@ jobs:
dylib_artifact: libtiledbgenomicsdb.dylib.${{ github.ref_name }}

test:
needs: [release-jar]
needs: [build-and-push-mac-dylib, release-jar]
uses: ./.github/workflows/release_test.yml
with:
release_artifact: release.${{ github.ref_name }}
tag_message: ${{ needs.build-and-push-mac-dylib.outputs.tag_message }}

publish:
needs: [test]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release_jar.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
docker create -it --name genomicsdb ghcr.io/genomicsdb/genomicsdb:release bash
docker cp genomicsdb:/build/GenomicsDB/build/src/main/libtiledbgenomicsdb.so .
docker cp genomicsdb:/build/GenomicsDB/build/target/genomicsdb-${VERSION_NUMBER}.jar .
docker cp genomicsdb:/build/GenomicsDB/build/target/genomicsdb-${VERSION_NUMBER}-allinone.jar .
docker cp genomicsdb:/build/GenomicsDB/build/target/genomicsdb-${VERSION_NUMBER}-allinone-spark.jar .
docker cp genomicsdb:/build/GenomicsDB/build/target/genomicsdb-${VERSION_NUMBER}-sources.jar .
docker cp genomicsdb:/build/GenomicsDB/build/target/genomicsdb-${VERSION_NUMBER}-javadoc.jar .
docker cp genomicsdb:/build/GenomicsDB/pom.xml genomicsdb-${VERSION_NUMBER}.pom
Expand Down
23 changes: 14 additions & 9 deletions .github/workflows/release_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
uses: actions/download-artifact@v3
with:
name: ${{ inputs.release_artifact }}

- name: Deploy Maven Central
shell: bash
run: |
Expand All @@ -57,23 +57,28 @@ jobs:
-DgroupId=org.genomicsdb -DartifactId=genomicsdb -Dversion=${VERSION_NUMBER} \
-Dpackaging=jar -DpomFile=genomicsdb-${VERSION_NUMBER}.pom -DrepositoryId=$REPO_ID \
-Djavadoc=genomicsdb-${VERSION_NUMBER}-javadoc.jar \
-Dsources=genomicsdb-${VERSION_NUMBER}-sources.jar \
-Dfiles=genomicsdb-${VERSION_NUMBER}-allinone.jar \
-Dtypes=jar \
-Dclassifiers=allinone
-Dsources=genomicsdb-${VERSION_NUMBER}-sources.jar
mvn gpg:sign-and-deploy-file -Durl=$URL -Dfile=genomicsdb-${VERSION_NUMBER}.jar \
-DgroupId=org.genomicsdb -DartifactId=genomicsdb -Dversion=${VERSION_NUMBER} \
-Dpackaging=jar -DpomFile=genomicsdb-${VERSION_NUMBER}.pom -DrepositoryId=$REPO_ID \
-Djavadoc=genomicsdb-${VERSION_NUMBER}-javadoc.jar \
-Dsources=genomicsdb-${VERSION_NUMBER}-sources.jar \
-Dfiles=genomicsdb-${VERSION_NUMBER}-allinone.jar \
-Dtypes=jar \
-Dclassifiers=allinone
-Dsources=genomicsdb-${VERSION_NUMBER}-sources.jar
echo mvn gpg:sign-and-deploy-file -Durl=$URL -Dfile=genomicsdb-${VERSION_NUMBER}-allinone-spark.jar \
-DgroupId=org.genomicsdb -DartifactId=genomicsdb-spark -Dversion=${VERSION_NUMBER} \
-Dpackaging=jar -DpomFile=genomicsdb-${VERSION_NUMBER}.pom -DrepositoryId=$REPO_ID \
-Djavadoc=genomicsdb-${VERSION_NUMBER}-javadoc.jar \
-Dsources=genomicsdb-${VERSION_NUMBER}-sources.jar
mvn gpg:sign-and-deploy-file -Durl=$URL -Dfile=genomicsdb-${VERSION_NUMBER}-allinone-spark.jar \
-DgroupId=org.genomicsdb -DartifactId=genomicsdb-spark -Dversion=${VERSION_NUMBER} \
-Dpackaging=jar -DpomFile=genomicsdb-${VERSION_NUMBER}.pom -DrepositoryId=$REPO_ID \
-Djavadoc=genomicsdb-${VERSION_NUMBER}-javadoc.jar \
-Dsources=genomicsdb-${VERSION_NUMBER}-sources.jar
if [[ ${VERSION_NUMBER} != *SNAPSHOT ]]; then
mvn nexus-staging:rc-list -DnexusUrl=https://oss.sonatype.org/ -DserverId=ossrh -f genomicsdb-${VERSION_NUMBER}.pom
stagingRepoId=$(mvn nexus-staging:rc-list -DnexusUrl=https://oss.sonatype.org/ -DserverId=ossrh | grep orggenomicsdb|cut -f2 -d' ')
echo $stagingRepoId
mvn nexus-staging:rc-close -DserverId=ossrh -DnexusUrl=https://oss.sonatype.org/ -DstagingRepositoryId=$stagingRepoId -f genomicsdb-${VERSION_NUMBER}.pom
mvn nexus-staging:rc-close -DserverId=ossrh -DnexusUrl=https://oss.sonatype.org/ -DstagingRepositoryId=$stagingRepoId -f genomicsdb-spark-${VERSION_NUMBER}.pom
fi
env:
OSSRH_USERNAME: ${{ secrets.OSSRH_USERNAME }}
Expand Down
31 changes: 19 additions & 12 deletions .github/workflows/release_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
release_artifact:
required: true
type: string
tag_message:
required: false
type: string

jobs:
test-jar:
Expand All @@ -28,10 +31,12 @@ jobs:

- name: Set version number
run: |
echo ${{github.ref_name}}
echo VERSION_NUMBER=${GITHUB_REF_NAME:1} >> $GITHUB_ENV
echo GENOMICSDB_VERSION=${GITHUB_REF_NAME:1} >> $GITHUB_ENV
echo GENOMICSDB_MIN_TAG=v1.5.0-SNAPSHOT >> $GITHUB_ENV
- uses: actions/setup-java@v3
with:
distribution: 'temurin'
Expand All @@ -54,15 +59,17 @@ jobs:
shell: bash
run: GENOMICSDB_TAG=${GENOMICSDB_MIN_TAG} ./test_genomicsdbjar.sh

#
# - name: Checkout GATK
# uses: actions/checkout@v3
# with:
# repository: broadinstitute/gatk
# lfs: 'true'
#
# - name: Try GATK integration test
# shell: bash
# run: |
# ./gradlew installDist -Dgenomicsdb.version=${VERSION_NUMBER}
# ./gradlew test --tests *GenomicsDB*

- name: Checkout GATK
if: ${{ !contains(inputs.tag_message,'skip-gatk-it') }}
uses: actions/checkout@v3
with:
repository: broadinstitute/gatk
lfs: 'true'

- name: Try GATK integration test
if: ${{ !contains(inputs.tag_message,'skip-gatk-it') }}
shell: bash
run: |
./gradlew installDist -Dgenomicsdb.version=${VERSION_NUMBER}
./gradlew test --tests *GenomicsDB*
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -548,14 +548,14 @@ if(BUILD_JAVA)

#Maven build - depends on dynamic library
add_custom_command(
OUTPUT ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}.jar ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-allinone.jar
OUTPUT ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}.jar ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-allinone-spark.jar
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/pom.xml ${CMAKE_BINARY_DIR}/pom.xml
COMMAND mvn versions:set ${MAVEN_QUIET_ARGS} -DnewVersion=${GENOMICSDB_RELEASE_VERSION} ${MAVEN_PROFILE}
COMMAND mvn package -DskipTests ${MAVEN_ARGS}
DEPENDS tiledbgenomicsdb ${JAVA_SCALA_SOURCES} pom.xml
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})

install(FILES ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}.jar ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-allinone.jar DESTINATION bin)
install(FILES ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}.jar ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-allinone-spark.jar DESTINATION bin)

execute_process(
COMMAND ln -sf ${CMAKE_SOURCE_DIR}/tests ${CMAKE_BINARY_DIR})
Expand All @@ -575,7 +575,7 @@ if(BUILD_JAVA)
add_jar(genomicsdb-${GENOMICSDB_RELEASE_VERSION}-examples
SOURCES ${GENOMICSDB_EXAMPLE_SOURCES}
log4j.properties
INCLUDE_JARS ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-allinone.jar
INCLUDE_JARS ${GENOMICSDB_MAVEN_BUILD_DIR}/genomicsdb-${GENOMICSDB_RELEASE_VERSION}-allinone-spark.jar
OUTPUT_DIR ${GENOMICSDB_MAVEN_BUILD_DIR})

#Deploy to Maven central
Expand Down
82 changes: 58 additions & 24 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,13 @@
<groupId>org.apache.spark</groupId>
<artifactId>${spark.core.artifactid}</artifactId>
<version>${spark.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>${spark.sql.artifactid}</artifactId>
<version>${spark.version}</version>
<optional>true</optional>
<exclusions>
<exclusion>
<groupId>com.google.protobuf</groupId>
Expand Down Expand Up @@ -193,29 +195,6 @@
</testExcludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<id>unzip-test-artifacts</id>
<phase>process-test-resources</phase>
<configuration>
<target>
<mkdir dir="${genomicsdb_build_directory}/test" />
<copy file="${test_source_directory}/../inputs/test.tgz" tofile="${genomicsdb_build_directory}/test/test.tar.gz" />
<gunzip src="${genomicsdb_build_directory}/test/test.tar.gz" />
<untar src="${genomicsdb_build_directory}/test/test.tar" dest="${genomicsdb_build_directory}/test/"/>
<replace file="${genomicsdb_build_directory}/test/inputs/query.json" token="inputs/" value="${genomicsdb_build_directory}/test/inputs/" />
<replace file="${genomicsdb_build_directory}/test/inputs/loader.json" token="inputs/" value="${genomicsdb_build_directory}/test/inputs/" />
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>

<!-- Jacoco adapted from
https://www.petrikainulainen.net/programming/maven/creating-code-coverage-reports-for-unit-and-integration-tests-with-the-jacoco-maven-plugin/ -->
Expand Down Expand Up @@ -296,6 +275,25 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<executions>
<execution>
<id>spark-excluded</id>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>spark</classifier>
<excludes>
<exclude>org/genomicsdb/spark/**</exclude>
</excludes>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
Expand All @@ -308,7 +306,7 @@
</goals>
<configuration>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedClassifierName>allinone</shadedClassifierName>
<shadedClassifierName>allinone-spark</shadedClassifierName>
<filters>
<filter>
<artifact>*:*</artifact>
Expand All @@ -330,6 +328,42 @@
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<id>unzip-test-artifacts</id>
<phase>process-test-resources</phase>
<configuration>
<target>
<mkdir dir="${genomicsdb_build_directory}/test" />
<copy file="${test_source_directory}/../inputs/test.tgz" tofile="${genomicsdb_build_directory}/test/test.tar.gz" />
<gunzip src="${genomicsdb_build_directory}/test/test.tar.gz" />
<untar src="${genomicsdb_build_directory}/test/test.tar" dest="${genomicsdb_build_directory}/test/"/>
<replace file="${genomicsdb_build_directory}/test/inputs/query.json" token="inputs/" value="${genomicsdb_build_directory}/test/inputs/" />
<replace file="${genomicsdb_build_directory}/test/inputs/loader.json" token="inputs/" value="${genomicsdb_build_directory}/test/inputs/" />
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
<execution>
<id>replace-core-jar</id>
<phase>package</phase>
<configuration>
<target>
<move file="${project.build.directory}/genomicsdb-${genomicsdb.version}-spark.jar"
tofile="${project.build.directory}/genomicsdb-${genomicsdb.version}.jar" />
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,19 @@

/**
* Example Invocation
* spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-1.3.1-SNAPSHOT-allinone.jar loader.json querypb.json true
* spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-1.3.1-SNAPSHOT-allinone-spark.jar loader.json querypb.json true
* querypb.json should be parseable by GenomicsDBExportConfiguration.ExportConfiguration
* OR
* spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-1.3.1-SNAPSHOT-allinone.jar loader.json query.json false
* spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-1.3.1-SNAPSHOT-allinone-spark.jar loader.json query.json false
* OR
* spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-1.3.1-SNAPSHOT-allinone.jar loader.json query.json
* spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-1.3.1-SNAPSHOT-allinone-spark.jar loader.json query.json
*/
public class GenomicsDBSparkBindings {
List<VariantCall> variantCalls;

public static void main(String[] args) throws IOException, ClassNotFoundException {
if (args.length < 2) {
throw new RuntimeException("Usage: spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-<VERSION>-allinone.jar <loader.json> <query.json> [<is_serialized_pb>]"+
throw new RuntimeException("Usage: spark-submit --class org.genomicsdb.spark.api.GenomicsDBSparkBindings genomicsdb-<VERSION>-allinone-spark.jar <loader.json> <query.json> [<is_serialized_pb>]"+
"Optional Argument 2 - <is_serialized_pb=True|False, default is false, if is_serialized_pb then query.json is a protobuf serialized file.");
}

Expand Down
4 changes: 2 additions & 2 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __find_genomicsdb_jar(target_dir, jar_file_name):

def setup_classpath(build_dir):
target_dir=os.path.join(build_dir,'target')
allinone_jar=__find_genomicsdb_jar(target_dir,'genomicsdb-*allinone.jar')
allinone_jar=__find_genomicsdb_jar(target_dir,'genomicsdb-*allinone-spark.jar')
examples_jar=__find_genomicsdb_jar(target_dir,'genomicsdb-*examples.jar')
if 'CLASSPATH' in os.environ:
classpath=os.environ['CLASSPATH']
Expand Down Expand Up @@ -94,7 +94,7 @@ def setup_jacoco(build_dir, build_type):
if e.errno != errno.EEXIST:
__error_exit('could not create jacoco-reports dir:'+e.errno+' '+e.filename+' '+e.strerror)
genomicsdb_classes_dir = os.path.join(target_dir, 'jacoco-classes')
allinone_archive = zipfile.ZipFile(__find_genomicsdb_jar(target_dir,'genomicsdb-*allinone.jar'))
allinone_archive = zipfile.ZipFile(__find_genomicsdb_jar(target_dir,'genomicsdb-*allinone-spark.jar'))
for file in allinone_archive.namelist():
if file.startswith('org/genomicsdb'):
allinone_archive.extract(file, genomicsdb_classes_dir)
Expand Down

0 comments on commit b0f4487

Please sign in to comment.