Skip to content

Commit

Permalink
add doc_quality transform
Browse files Browse the repository at this point in the history
Signed-off-by: Daiki Tsuzuku <dtsuzuku@jp.ibm.com>
  • Loading branch information
dtsuzuku-ibm committed Jun 21, 2024
1 parent 399d2c1 commit a9b7807
Show file tree
Hide file tree
Showing 54 changed files with 4,462 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ PROFILER_RAY_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)

RESIZE_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

DOC_QUALITY_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
DOC_QUALITY_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

LANG_ID_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
LANG_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

Expand Down
1 change: 1 addition & 0 deletions scripts/kind/populate_minio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ mc cp --recursive ${REPOROOT}/transforms/code/proglang_select/ray/test-data/lang
mc cp --recursive ${REPOROOT}/transforms/code/malware/ray/test-data/input/ kfp/test/malware/input
# language
mc cp --recursive ${REPOROOT}/transforms/language/lang_id/ray/test-data/input/ kfp/test/lang_id/input
mc cp --recursive ${REPOROOT}/transforms/language/doc_quality/ray/test-data/input/ kfp/test/doc_quality/input
# universal
mc cp --recursive ${REPOROOT}/transforms/universal/doc_id/ray/test-data/input/ kfp/test/doc_id/input
mc cp --recursive ${REPOROOT}/transforms/universal/ededup/ray/test-data/input/ kfp/test/ededup/input
Expand Down
2 changes: 2 additions & 0 deletions transforms/language/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cc_net
lm_sp
24 changes: 24 additions & 0 deletions transforms/language/doc_quality/.make.doc_quality
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
.PHONY: lm_sp
lm_sp::
@# Help: Clone the cc_net repo with the kenlm model and build the model
@if [[ ! -d "../cc_net" ]]; then \
cd ../; \
git clone https://github.com/facebookresearch/cc_net; \
cd cc_net/; \
$(MAKE) lang=en dl_lm; \
cd ../; \
rm -rf lm_sp; \
mkdir lm_sp; \
cp cc_net/data/lm_sp/* lm_sp; \
fi

.PHONY: clean_lm_sp
clean_lm_sp::
rm -rf ../cc_net
rm -rf ../lm_sp

.PHONY: test-image-pytest
test-image-pytest::
# Put this 2nd so its help showss up instead of .defaults.image help
@# Help: Test $(DOCKER_LOCAL_IMAGE) using test source inside the image.
$(DOCKER) run -t -v $$(dirname ${PWD})/lm_sp:$(DOCKER_HOME_DIR)/lm_sp --rm $(DOCKER_LOCAL_IMAGE) pytest -s test
66 changes: 66 additions & 0 deletions transforms/language/doc_quality/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
REPOROOT=../../..
# Use make help, to see the available rules
include $(REPOROOT)/.make.defaults

setup::
@# Help: Recursively make $@ all subdirs
$(MAKE) RULE=$@ .recurse

clean::
@# Help: Recursively make $@ all subdirs
$(MAKE) RULE=$@ .recurse

build::
@# Help: Recursively make $@ in subdirs
$(MAKE) RULE=$@ .recurse
venv::
@# Help: Recursively make $@ in subdirs
$(MAKE) RULE=$@ .recurse

image::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

set-versions::
@# Help: Recursively $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

publish::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

test-image::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

test::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

test-src::
@# Help: Recursively make $@ in all subdirs
$(MAKE) RULE=$@ .recurse

load-image::
@# Help: Recursively make $@ in all subdirs
$(MAKE) RULE=$@ .recurse

.PHONY: workflow-venv
workflow-venv:
$(MAKE) -C kfp_ray workflow-venv

.PHONY: workflow-build
workflow-build:
$(MAKE) -C kfp_ray workflow-build

.PHONY: workflow-test
workflow-test:
$(MAKE) -C kfp_ray workflow-test

.PHONY: workflow-upload
workflow-upload:
$(MAKE) -C kfp_ray workflow-upload

.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
$(MAKE) -C kfp_ray workflow-reconcile-requirements
12 changes: 12 additions & 0 deletions transforms/language/doc_quality/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Document Quality Transform
The Document Quality transforms serves as a simple exemplar to demonstrate the development
of a simple 1:1 transform. Per the set of
[transform project conventions](../../README.md#transform-project-conventions)
the following runtimes are available:

* [python](python/README.md) - provides the base python-based transformation
implementation.
* [ray](ray/README.md) - enables the running of the base python transformation
in a Ray runtime
* [kfp](kfp_ray/README.md) - enables running the ray docker image for
document quality in a kubernetes cluster using a generated `yaml` file.
52 changes: 52 additions & 0 deletions transforms/language/doc_quality/kfp_ray/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
REPOROOT=${CURDIR}/../../../../
WORKFLOW_VENV_ACTIVATE=${REPOROOT}/transforms/venv/bin/activate
include $(REPOROOT)/transforms/.make.workflows

SRC_DIR=${CURDIR}/../ray/

PYTHON_WF := $(shell find ./ -name '*_wf.py')
YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})

workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}

clean:: .defaults.clean

setup::

venv::

build::

test::

test-src::

test-image::

publish::

image::

load-image::

set-versions: workflow-reconcile-requirements

.PHONY: workflow-build
workflow-build: workflow-venv
$(MAKE) $(YAML_WF)

.PHONY: workflow-test
workflow-test: workflow-build
$(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=doc_quality_wf.yaml

.PHONY: workflow-upload
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
done

.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
@for file in $(PYTHON_WF); do \
$(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
done
Loading

0 comments on commit a9b7807

Please sign in to comment.