Skip to content

Commit

Permalink
more data factory fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
blublinsky committed Apr 26, 2024
1 parent 1c7a0ff commit e66d7b8
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 15 deletions.
2 changes: 0 additions & 2 deletions transforms/code/code_quality/src/code_quality_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@

import argparse
import os
import sys

import numpy as np
import pyarrow
import pyarrow as pa
from bs4 import BeautifulSoup
from data_processing.ray import DefaultTableTransformConfiguration, TransformLauncher
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import pyarrow as pa
import ray
from data_processing.data_access import DataAccess, DataAccessFactory, DataAccessLocal
from data_processing.data_access import DataAccess, DataAccessFactoryBase, DataAccessFactory
from data_processing.ray import (
DefaultTableTransformConfiguration,
DefaultTableTransformRuntime,
Expand Down Expand Up @@ -120,7 +120,7 @@ def __init__(self, params: dict[str, Any]):

def get_transform_config(
self,
data_access_factory: DataAccessFactory,
data_access_factory: DataAccessFactoryBase,
statistics: ActorHandle,
files: list[str],
) -> dict[str, Any]:
Expand Down
4 changes: 2 additions & 2 deletions transforms/universal/doc_id/src/doc_id_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import pyarrow as pa
import ray
from data_processing.data_access import DataAccessFactory
from data_processing.data_access import DataAccessFactoryBase
from data_processing.ray import (
DefaultTableTransformConfiguration,
DefaultTableTransformRuntime,
Expand Down Expand Up @@ -128,7 +128,7 @@ def __init__(self, params: dict[str, Any]):
super().__init__(params)

def get_transform_config(
self, data_access_factory: DataAccessFactory, statistics: ActorHandle, files: list[str]
self, data_access_factory: DataAccessFactoryBase, statistics: ActorHandle, files: list[str]
) -> dict[str, Any]:
"""
Set environment for filter execution
Expand Down
4 changes: 2 additions & 2 deletions transforms/universal/ededup/src/ededup_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import pyarrow as pa
import ray
from data_processing.data_access import DataAccessFactory
from data_processing.data_access import DataAccessFactoryBase
from data_processing.ray import (
DefaultTableTransformConfiguration,
DefaultTableTransformRuntime,
Expand Down Expand Up @@ -178,7 +178,7 @@ def __init__(self, params: dict[str, Any]):
self.filters = []

def get_transform_config(
self, data_access_factory: DataAccessFactory, statistics: ActorHandle, files: list[str]
self, data_access_factory: DataAccessFactoryBase, statistics: ActorHandle, files: list[str]
) -> dict[str, Any]:
"""
Set environment for transform execution
Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/fdedup/src/fdedup_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import pyarrow as pa
import ray
from data_processing.data_access import DataAccessFactory
from data_processing.data_access import DataAccessFactoryBase
from data_processing.ray import (
DefaultTableTransformConfiguration,
DefaultTableTransformRuntime,
Expand Down Expand Up @@ -334,7 +334,7 @@ def __init__(self, params: dict[str, Any]):
self.random_delay_limit = self.params.get("random_delay_limit", 10)

def get_transform_config(
self, data_access_factory: DataAccessFactory, statistics: ActorHandle, files: list[str]
self, data_access_factory: DataAccessFactoryBase, statistics: ActorHandle, files: list[str]
) -> dict[str, Any]:
"""
Set environment for filter execution
Expand Down Expand Up @@ -369,7 +369,7 @@ def get_transform_config(
}

def _create_doc_actors(
self, data_access_factory: DataAccessFactory, statistics: ActorHandle, files: list[str]
self, data_access_factory: DataAccessFactoryBase, statistics: ActorHandle, files: list[str]
) -> None:
"""
Create document actors
Expand Down Expand Up @@ -420,7 +420,7 @@ def _create_doc_actors(
)

def _create_doc_actors_internal(
self, data_access_factory: DataAccessFactory, statistics: ActorHandle, mn_min_hash: MurmurMH, files: list[str]
self, data_access_factory: DataAccessFactoryBase, statistics: ActorHandle, mn_min_hash: MurmurMH, files: list[str]
) -> None:
"""
Create document actors
Expand Down Expand Up @@ -495,7 +495,7 @@ def _create_doc_actors_internal(

def _process_buckets(
self,
data_access_factory: DataAccessFactory,
data_access_factory: DataAccessFactoryBase,
statistics: ActorHandle,
bucket_collectors: list[ActorHandle],
minhash_collectors: list[ActorHandle],
Expand Down Expand Up @@ -591,7 +591,7 @@ def _process_buckets(

def _preprocess_tables(
self,
data_access_factory: DataAccessFactory,
data_access_factory: DataAccessFactoryBase,
statistics: ActorHandle,
files: list[str],
mn_min_hash: MurmurMH,
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/filter/src/filter_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pyarrow as pa
from data_processing.ray import DefaultTableTransformConfiguration, TransformLauncher
from data_processing.transform import AbstractTableTransform
from data_processing.utils import CLIArgumentProvider, ParamsUtils, get_logger
from data_processing.utils import CLIArgumentProvider, get_logger


logger = get_logger(__name__)
Expand Down

0 comments on commit e66d7b8

Please sign in to comment.