Skip to content

Commit

Permalink
Merge branch 'main' into add-column-level-lineage
Browse files Browse the repository at this point in the history
  • Loading branch information
pawel-big-lebowski committed Sep 28, 2022
2 parents b6d37fe + 2909864 commit 1aefca2
Show file tree
Hide file tree
Showing 42 changed files with 949 additions and 132 deletions.
10 changes: 7 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ only-on-release: &only-on-release
branches:
ignore: /.*/

orbs:
# https://circleci.com/orbs/registry/orb/codecov/codecov
codecov: codecov/codecov@3.2.3

jobs:
build-api:
working_directory: ~/marquez
Expand All @@ -32,7 +36,7 @@ jobs:
- run: ./gradlew --no-daemon --stacktrace api:javadoc
- run: ./gradlew --no-daemon --stacktrace api:build
- run: ./gradlew --no-daemon api:jacocoTestReport
- run: bash <(curl -s https://codecov.io/bash)
- codecov/upload
- store_test_results:
path: api/build/test-results/test
- store_artifacts:
Expand Down Expand Up @@ -99,7 +103,7 @@ jobs:
- run: ./gradlew --no-daemon --stacktrace clients:java:javadoc
- run: ./gradlew --no-daemon --stacktrace clients:java:build
- run: ./gradlew --no-daemon clients:java:jacocoTestReport
- run: bash <(curl -s https://codecov.io/bash)
- codecov/upload
- store_test_results:
path: clients/java/build/test-results/test
- store_artifacts:
Expand All @@ -119,7 +123,7 @@ jobs:
- run: pip install -e .[dev]
- run: flake8
- run: pytest --cov=marquez_python tests/
- run: bash <(curl -s https://codecov.io/bash)
- codecov/upload

build-client-python:
working_directory: ~/marquez/clients/python
Expand Down
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
*.md
.*
Dockerfile
build
api/build
docs
web/node_modules
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
API_PORT=5000
API_ADMIN_PORT=5001
WEB_PORT=3000
TAG=0.19.0
TAG=0.26.0
47 changes: 47 additions & 0 deletions .github/workflows/headerchecker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
name: Check for Headers in Source Code

on:
push:
branches: ['renovate/**']
pull_request:
branches: [main]

jobs:
build:
name: Check for Headers
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- uses: actions/setup-python@v4
with:
python-version: '3.9'

- id: files
uses: jitterbit/get-changed-files@v1
with:
format: 'json'
token: ${{ secrets.GITHUB_TOKEN }}

- name: Check for headers
run: |
ok=1
readarray -t files <<<"$(jq -r '.[]' <<<'${{ steps.files.outputs.all }}')"
for file in ${files[@]}; do
if [[ ($file == *".java") ]]; then
if ! grep -q Copyright "$file"; then
ok=0
echo "Copyright header not found in $file"
fi
fi
done
if [[ $ok == 0 ]]; then
exit 1
else
GREEN="\e[32m"
echo -e "${GREEN}All changed & added files have been scanned. Result: no headers are missing.${ENDCOLOR}"
fi
6 changes: 3 additions & 3 deletions .github/workflows/test-chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ jobs:
fetch-depth: 0

- name: Setup Helm
uses: azure/setup-helm@v2.1
uses: azure/setup-helm@v2.2

- name: Setup Python
uses: actions/setup-python@v3
with:
python-version: 3.7

- name: Setup chart-testing
uses: helm/chart-testing-action@v2.2.1
uses: helm/chart-testing-action@v2.3.0

- name: Run chart-testing (list-changed)
id: list-changed
Expand All @@ -38,7 +38,7 @@ jobs:
run: ct lint --config ct.yaml

- name: Create kind cluster
uses: helm/kind-action@v1.2.0
uses: helm/kind-action@v1.4.0

- name: Run chart-testing (install)
run: ct install --config ct.yaml
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# Changelog

## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.26.0...HEAD)

### Added
* Implemented dataset symlink feature which allows providing multiple names for a dataset and adds edges to lineage graph based on symlinks [`#2066`](https://github.com/MarquezProject/marquez/pull/2066) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)
* Store column lineage facets in separate table [`#2096`](https://github.com/MarquezProject/marquez/pull/2096) [@mzareba382](https://github.com/mzareba382) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski)

### Fixed
* Add support for `parentRun` facet as reported by older Airflow OpenLineage versions [@collado-mike](https://github.com/collado-mike)

## [0.26.0](https://github.com/MarquezProject/marquez/compare/0.25.0...0.26.0) - 2022-09-15

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ COPY build.gradle build.gradle
COPY api ./api
COPY api/build.gradle ./api/build.gradle
COPY clients/java ./clients/java
RUN ./gradlew --no-daemon :api:shadowJar
RUN ./gradlew --no-daemon clean :api:shadowJar

FROM eclipse-temurin:17
RUN apt-get update && apt-get install -y postgresql-client bash coreutils
Expand Down
2 changes: 1 addition & 1 deletion api/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies {
implementation "io.sentry:sentry:${sentryVersion}"
implementation 'org.flywaydb:flyway-core:8.5.13'
implementation "org.postgresql:postgresql:${postgresqlVersion}"
implementation 'com.graphql-java:graphql-java:18.1'
implementation 'com.graphql-java:graphql-java:18.3'
implementation 'com.graphql-java-kickstart:graphql-java-servlet:12.0.0'

testImplementation "io.dropwizard:dropwizard-testing:${dropwizardVersion}"
Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/marquez/db/BaseDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ public interface BaseDao extends SqlObject {
@CreateSqlObject
NamespaceDao createNamespaceDao();

@CreateSqlObject
DatasetSymlinkDao createDatasetSymlinkDao();

@CreateSqlObject
RunDao createRunDao();

Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/marquez/db/Columns.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ private Columns() {}
public static final String FIELD_UUIDS = "field_uuids";
public static final String LIFECYCLE_STATE = "lifecycle_state";

/* DATASET SYMLINK ROW COLUMNS */
public static final String IS_PRIMARY = "is_primary";

/* STREAM VERSION ROW COLUMNS */
public static final String SCHEMA_LOCATION = "schema_location";

Expand Down
26 changes: 16 additions & 10 deletions api/src/main/java/marquez/db/DatasetDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import marquez.db.mappers.DatasetMapper;
import marquez.db.mappers.DatasetRowMapper;
import marquez.db.models.DatasetRow;
import marquez.db.models.DatasetSymlinkRow;
import marquez.db.models.DatasetVersionRow;
import marquez.db.models.NamespaceRow;
import marquez.db.models.SourceRow;
Expand Down Expand Up @@ -73,8 +74,7 @@ void updateLastModifiedAt(
WITH selected_datasets AS (
SELECT d.*
FROM datasets_view d
WHERE d.namespace_name = :namespaceName
AND d.name = :datasetName
WHERE CAST((:namespaceName, :datasetName) AS DATASET_NAME) = ANY(d.dataset_symlinks)
), dataset_runs AS (
SELECT d.uuid, d.name, d.namespace_name, dv.run_uuid, dv.lifecycle_state, event_time, event
FROM selected_datasets d
Expand Down Expand Up @@ -229,7 +229,7 @@ INSERT INTO datasets (
:description,
:isDeleted,
false
) ON CONFLICT (namespace_uuid, name)
) ON CONFLICT (uuid)
DO UPDATE SET
type = EXCLUDED.type,
updated_at = EXCLUDED.updated_at,
Expand Down Expand Up @@ -275,7 +275,7 @@ DatasetRow upsert(
+ ":sourceName, "
+ ":name, "
+ ":physicalName) "
+ "ON CONFLICT (namespace_uuid, name) "
+ "ON CONFLICT (uuid) "
+ "DO UPDATE SET "
+ "type = EXCLUDED.type, "
+ "updated_at = EXCLUDED.updated_at, "
Expand All @@ -296,8 +296,10 @@ DatasetRow upsert(
"""
UPDATE datasets
SET is_hidden = true
WHERE namespace_name = :namespaceName
AND name = :name
FROM dataset_symlinks, namespaces
WHERE dataset_symlinks.dataset_uuid = datasets.uuid
AND namespaces.uuid = dataset_symlinks.namespace_uuid
AND namespaces.name=:namespaceName AND dataset_symlinks.name=:name
RETURNING *
""")
Optional<DatasetRow> delete(String namespaceName, String name);
Expand All @@ -310,6 +312,10 @@ default Dataset upsertDatasetMeta(
createNamespaceDao()
.upsertNamespaceRow(
UUID.randomUUID(), now, namespaceName.getValue(), DEFAULT_NAMESPACE_OWNER);
DatasetSymlinkRow symlinkRow =
createDatasetSymlinkDao()
.upsertDatasetSymlinkRow(
UUID.randomUUID(), datasetName.getValue(), namespaceRow.getUuid(), true, null, now);
SourceRow sourceRow =
createSourceDao()
.upsertOrDefault(
Expand All @@ -318,13 +324,12 @@ default Dataset upsertDatasetMeta(
now,
datasetMeta.getSourceName().getValue(),
"");
UUID newDatasetUuid = UUID.randomUUID();
DatasetRow datasetRow;

if (datasetMeta.getDescription().isPresent()) {
datasetRow =
upsert(
newDatasetUuid,
symlinkRow.getUuid(),
datasetMeta.getType(),
now,
namespaceRow.getUuid(),
Expand All @@ -338,7 +343,7 @@ default Dataset upsertDatasetMeta(
} else {
datasetRow =
upsert(
newDatasetUuid,
symlinkRow.getUuid(),
datasetMeta.getType(),
now,
namespaceRow.getUuid(),
Expand All @@ -349,7 +354,8 @@ default Dataset upsertDatasetMeta(
datasetMeta.getPhysicalName().getValue());
}

updateDatasetMetric(namespaceName, datasetMeta.getType(), newDatasetUuid, datasetRow.getUuid());
updateDatasetMetric(
namespaceName, datasetMeta.getType(), symlinkRow.getUuid(), datasetRow.getUuid());

TagDao tagDao = createTagDao();
List<DatasetTagMapping> datasetTagMappings = new ArrayList<>();
Expand Down
51 changes: 51 additions & 0 deletions api/src/main/java/marquez/db/DatasetSymlinkDao.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2018-2022 contributors to the Marquez project
* SPDX-License-Identifier: Apache-2.0
*/

package marquez.db;

import java.time.Instant;
import java.util.Optional;
import java.util.UUID;
import marquez.db.mappers.DatasetSymlinksRowMapper;
import marquez.db.models.DatasetSymlinkRow;
import org.jdbi.v3.sqlobject.config.RegisterRowMapper;
import org.jdbi.v3.sqlobject.statement.SqlQuery;
import org.jdbi.v3.sqlobject.statement.SqlUpdate;

@RegisterRowMapper(DatasetSymlinksRowMapper.class)
public interface DatasetSymlinkDao extends BaseDao {

default DatasetSymlinkRow upsertDatasetSymlinkRow(
UUID uuid, String name, UUID namespaceUuid, boolean isPrimary, String type, Instant now) {
doUpsertDatasetSymlinkRow(uuid, name, namespaceUuid, isPrimary, type, now);
return findDatasetSymlinkByNamespaceUuidAndName(namespaceUuid, name).orElseThrow();
}

@SqlQuery("SELECT * FROM dataset_symlinks WHERE namespace_uuid = :namespaceUuid and name = :name")
Optional<DatasetSymlinkRow> findDatasetSymlinkByNamespaceUuidAndName(
UUID namespaceUuid, String name);

@SqlUpdate(
"""
INSERT INTO dataset_symlinks (
dataset_uuid,
name,
namespace_uuid,
is_primary,
type,
created_at,
updated_at
) VALUES (
:uuid,
:name,
:namespaceUuid,
:isPrimary,
:type,
:now,
:now)
ON CONFLICT (name, namespace_uuid) DO NOTHING""")
void doUpsertDatasetSymlinkRow(
UUID uuid, String name, UUID namespaceUuid, boolean isPrimary, String type, Instant now);
}
Loading

0 comments on commit 1aefca2

Please sign in to comment.