Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pre commit gh action #95

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Run Pre-commit

on:
workflow_dispatch:
push:
pull_request:

jobs:
run-pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run pre-commit
run: |
pip install pre-commit
pre-commit run --all-files
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
from typing import Any

from data_processing.data_access import DataAccessFactoryBase
from data_processing.runtime import (
AbstractTransformFileProcessor,
)
from data_processing.transform import TransformStatistics
from data_processing.runtime import AbstractTransformFileProcessor
from data_processing.runtime.pure_python import PythonTransformRuntimeConfiguration
from data_processing.transform import TransformStatistics


class PythonTransformFileProcessor(AbstractTransformFileProcessor):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ def transform_binary(self, byte_array: bytes, ext: str) -> tuple[list[tuple[byte
# Add number of rows to stats
stats = stats | {"source_doc_count": table.num_rows}
# convert tables to files
return self._check_and_convert_tables(out_tables=out_tables, stats=stats | {"source_doc_count": table.num_rows})
return self._check_and_convert_tables(
out_tables=out_tables, stats=stats | {"source_doc_count": table.num_rows}
)

def transform(self, table: pa.Table) -> tuple[list[pa.Table], dict[str, Any]]:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ class TransformConfiguration(CLIArgumentProvider):
This is a base transform configuration class defining transform's input/output parameter
"""

def __init__(self, name: str, transform_class: type[AbstractBinaryTransform], remove_from_metadata: list[str] = []):
def __init__(
self, name: str, transform_class: type[AbstractBinaryTransform], remove_from_metadata: list[str] = []
):
"""
Initialization
:param name: transformer name
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/src/execute_ray_job_multi_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
access_key, secret_key, url = KFPUtils.credentials()
# add s3 credentials to exec params
exec_params["data_s3_cred"] = (
"{'access_key': '" + access_key + "', 'secret_key': '" + secret_key + "', 'url': '" + url + "'}"
"{'access_key': '" + access_key + "', 'secret_key': '" + secret_key + "', 'url': '" + url + "'}"
)
# extra credentials
prefix = args.prefix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ def add_settings_to_component(
def set_s3_env_vars_to_component(
component: dsl.ContainerOp,
secret: str,
env2key: dict[str, str] = {"S3_KEY": "s3-key", "S3_SECRET": "s3-secret", "ENDPOINT": "s3-endpoint"},
env2key: dict[str, str] = {
"S3_KEY": "s3-key",
"S3_SECRET": "s3-secret", # pragma: allowlist secret
"ENDPOINT": "s3-endpoint",
},
prefix: str = None,
) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/fdedup/ray/src/fdedup_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from data_processing.data_access import DataAccessFactoryBase
from data_processing.runtime.ray import (
DefaultRayTransformRuntime,
RayTransformLauncher,
RayTransformFileProcessor,
RayTransformLauncher,
RayUtils,
)
from data_processing.runtime.ray.runtime_configuration import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ def _split_text_with_word_space(text: str, chunk_size: int) -> str:

if last_space_index != -1: # s[last_space_index] = ' '
# If found, return the chunk up to and include such space:
yield text[index: last_space_index + 1]
yield text[index : last_space_index + 1]
index = last_space_index + 1
else:
# If not, force cutting up to chunk_size:
yield text[index: index + chunk_size]
yield text[index : index + chunk_size]
index += chunk_size
else:
yield text[index: index + chunk_size]
yield text[index : index + chunk_size]
index += chunk_size


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@
"num_chars": 16836009
},
"source": {
"name": "/Users/xdang/00proj/04-FM/01_code/fm-data-engineering/transforms/universal/tokenization/test-data/ds02/input",
"name": "fm-data-engineering/transforms/universal/tokenization/test-data/ds02/input",
"type": "path"
},
"target": {
"name": "/Users/xdang/00proj/04-FM/01_code/fm-data-engineering/transforms/universal/tokenization/output/ds02",
"name": "fm-data-engineering/transforms/universal/tokenization/output/ds02",
"type": "path"
}
}