Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .mise.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tools]
python="3.11"
python="3.12"
poetry="2.3.3"
java="liberica-1.8.0"
767 changes: 482 additions & 285 deletions poetry.lock

Large diffs are not rendered by default.

113 changes: 0 additions & 113 deletions pylint_checkers/check_typing_imports.py

This file was deleted.

50 changes: 24 additions & 26 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@ packages = [
]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
python = ">=3.10,<3.13" # breaking changes beyond 3.12
boto3 = ">=1.34.162,<1.36" # breaking change beyond 1.36
botocore = ">=1.34.162,<1.36" # breaking change beyond 1.36
delta-spark = "2.4.*"
duckdb = "1.1.*" # breaking changes beyond 1.1
Jinja2 = "3.1.*"
lxml = "^4.9.1"
delta-spark = "3.2.0"
duckdb = "1.1.3" # breaking changes beyond 1.1
Jinja2 = "3.1.6"
lxml = "4.9.4"
numpy = "1.26.4"
openpyxl = "^3.1"
pandas = "^2.2.2"
polars = "0.20.*"
pyarrow = "^17.0.0"
pydantic = "1.10.16"
pyspark = "3.4.*"
typing_extensions = "^4.6.2"
openpyxl = "3.1.5"
pandas = "2.3.3"
polars = "0.20.31"
pyarrow = "17.0.0"
pydantic = "2.13.4"
pyspark = "3.5.2"
typing_extensions = "4.15.0"

[tool.poetry.group.dev]
optional = true
Expand All @@ -58,30 +58,30 @@ commitizen = "4.9.1"
pre-commit = "4.3.0"
charset-normalizer = "3.4.6"
python-discovery = "1.2.0"
requests = "2.33.0"

[tool.poetry.group.test]
optional = true

[tool.poetry.group.test.dependencies]
faker = "18.11.1"
behave = "1.3.3"
coverage = "7.11.0"
moto = {extras = ["s3"], version = "4.0.13"}
moto = {extras = ["s3"], version = "4.2.14"}
requests = "2.33.0" # dependency of `moto`
Werkzeug = "3.1.6"
pytest = "8.4.2"
pytest-lazy-fixtures = "1.4.0" # switched from https://github.com/TvoroG/pytest-lazy-fixture as it's no longer supported
xlsx2csv = "0.8.2"
xlsx2csv = "0.8.4" # polars requirement

[tool.poetry.group.lint]
optional = true

[tool.poetry.group.lint.dependencies]
black = "24.3.0"
astroid = "2.14.2"
isort = "5.11.5"
pylint = "2.16.4"
mypy = "0.991"
astroid = "3.3.9"
isort = "5.13.2"
pylint = "3.3.9"
mypy = "1.20.2"
librt = "0.11.0" # mypy dependency
boto3-stubs = {extras = ["essential"], version = "1.26.72"}
botocore-stubs = "1.29.72"
pandas-stubs = "1.2.0.62"
Expand All @@ -100,7 +100,7 @@ optional = true

[tool.poetry.group.docs.dependencies]
click = "8.2.1"
mkdocs = "^1.6.1"
mkdocs = "1.6.1"
mkdocstrings = { version = "1.0.3", extras = ["python"] }
griffelib = "2.0.1"
pymdown-extensions = "10.21.2"
Expand Down Expand Up @@ -142,10 +142,6 @@ source_pkgs = [
[tool.coverage.report]
show_missing = true

[tool.pylint]
init-hook = "import sys; sys.path.append('./pylint_checkers')"
load-plugins = "check_typing_imports"

[tool.pylint.main]
extension-pkg-allow-list = ["pyspark", "lxml", "pydantic"]
fail-under = 10.0
Expand Down Expand Up @@ -194,7 +190,7 @@ max-statements = 50
min-public-methods = 2

[tool.pylint.exceptions]
overgeneral-exceptions = ["BaseException", "Exception"]
overgeneral-exceptions = ["builtins.BaseException", "builtins.Exception"]

[tool.pylint.format]
ignore-long-lines = "^\\s*(# )?<?https?://\\S+>?$"
Expand Down Expand Up @@ -229,6 +225,8 @@ disable = [
"use-symbolic-message-instead",
"logging-fstring-interpolation",
"fixme",
"too-many-positional-arguments",
"too-many-arguments",
]
enable = ["c-extension-no-member"]

Expand Down
16 changes: 8 additions & 8 deletions src/dve/core_engine/backends/base/auditing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from types import TracebackType
from typing import Any, ClassVar, Generic, Optional, TypeVar, Union

from pydantic import ValidationError, validate_arguments
from pydantic import ValidationError, validate_call
from typing_extensions import Literal, get_origin

from dve.core_engine.models import (
Expand Down Expand Up @@ -98,8 +98,8 @@ def __init__(self, name: str, record_type: type[AuditRecord]):
def schema(self) -> dict[str, type]:
"""Determine python schema of auditor"""
return {
fld: str if get_origin(mdl.type_) == Literal else mdl.type_
for fld, mdl in self._record_type.__fields__.items()
fld: str if get_origin(mdl.annotation) == Literal else mdl.annotation # type: ignore
for fld, mdl in self._record_type.model_fields.items()
}

@staticmethod
Expand Down Expand Up @@ -195,7 +195,7 @@ def conv_to_iterable(recs: Union[AuditorType, AuditReturnType]) -> Iterable[dict
"""Convert AuditReturnType to iterable of dictionaries"""
raise NotImplementedError()

@validate_arguments
@validate_call
def add_processing_records(self, processing_records: list[ProcessingStatusRecord]):
"""Add an entry to the processing_status auditor."""
if self.pool:
Expand All @@ -207,7 +207,7 @@ def add_processing_records(self, processing_records: list[ProcessingStatusRecord
records=[dict(rec) for rec in processing_records]
)

@validate_arguments
@validate_call
def add_submission_statistics_records(self, sub_stats: list[SubmissionStatisticsRecord]):
"""Add an entry to the submission statistics auditor."""
if self.pool:
Expand All @@ -217,7 +217,7 @@ def add_submission_statistics_records(self, sub_stats: list[SubmissionStatistics
)
return self._submission_statistics.add_records(records=[dict(rec) for rec in sub_stats])

@validate_arguments
@validate_call
def add_transfer_records(self, transfer_records: list[TransferRecord]):
"""Add an entry to the transfers auditor"""
if self.pool:
Expand All @@ -226,7 +226,7 @@ def add_transfer_records(self, transfer_records: list[TransferRecord]):
)
return self._transfers.add_records(records=[dict(rec) for rec in transfer_records])

@validate_arguments
@validate_call
def add_new_submissions(
self,
submissions: list[SubmissionMetadata],
Expand All @@ -249,7 +249,7 @@ def add_new_submissions(
processing_status="received",
job_run_id=job_run_id,
**ts_info,
).dict(),
).model_dump(),
}
processing_status_recs.append(processing_rec)
if sub_info:
Expand Down
16 changes: 8 additions & 8 deletions src/dve/core_engine/backends/implementations/duckdb/auditing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
OrderCriteria,
)
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
PYTHON_TYPE_TO_DUCKDB_TYPE,
get_duckdb_type_from_annotation,
table_exists,
)
from dve.core_engine.backends.utilities import PYTHON_TYPE_TO_POLARS_TYPE
from dve.core_engine.backends.utilities import get_polars_type_from_annotation
from dve.core_engine.models import (
AuditRecord,
ProcessingStatusRecord,
Expand Down Expand Up @@ -62,18 +62,18 @@ def ddb_create_table_sql(self) -> str:
"""Generate create table sql script for auditor"""
_sql_expression = f"CREATE TABLE {self._name} ("
_sql_expression += ", ".join(
[f"{fld} {PYTHON_TYPE_TO_DUCKDB_TYPE.get(dtype)}" for fld, dtype in self.schema.items()]
[
f"{fld} {get_duckdb_type_from_annotation(dtype)}"
for fld, dtype in self.schema.items()
]
)
_sql_expression += ")"
return _sql_expression

@property
def polars_schema(self) -> dict[str, PolarsType]:
"""Get polars dataframe schema for auditor"""
return {
fld: PYTHON_TYPE_TO_POLARS_TYPE.get(dtype, pl.Utf8) # type: ignore
for fld, dtype in self.schema.items()
}
return {fld: get_polars_type_from_annotation(dtype) for fld, dtype in self.schema.items()}

def get_relation(self) -> DuckDBPyRelation:
"""Get a relation to interact with the auditor duckdb table"""
Expand Down Expand Up @@ -106,7 +106,7 @@ def conv_to_entity(self, recs: list[AuditRecord]) -> DuckDBPyRelation:
"""Convert a list of audit records to a relation"""
# pylint: disable=W0612
rec_df = pl.DataFrame( # type: ignore
[rec.dict() for rec in recs],
[rec.model_dump() for rec in recs],
schema=self.polars_schema,
)
return self._connection.sql("select * from rec_df")
Expand Down
17 changes: 7 additions & 10 deletions src/dve/core_engine/backends/implementations/duckdb/contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from duckdb.typing import DuckDBPyType
from polars.datatypes.classes import DataTypeClass as PolarsType
from pydantic import BaseModel
from pydantic.fields import ModelField

import dve.parser.file_handling as fh
from dve.common.error_utils import (
Expand Down Expand Up @@ -96,8 +95,8 @@ def create_entity_from_py_iterator( # pylint: disable=unused-argument
) -> DuckDBPyRelation:
"""Create DuckDB Relation from iterator of records"""
polars_schema: dict[str, PolarsType] = {
fld.name: get_polars_type_from_annotation(fld.type_)
for fld in stringify_model(schema).__fields__.values()
name: get_polars_type_from_annotation(fld.annotation)
for name, fld in stringify_model(schema).model_fields.items()
}
_lazy_df = pl.LazyFrame(records, polars_schema) # type: ignore # pylint: disable=unused-variable
return self._connection.sql("select * from _lazy_df")
Expand Down Expand Up @@ -130,17 +129,15 @@ def apply_data_contract(
) as msg_writer:
for entity_name, relation in entities.items():
# get dtypes for all fields -> python data types or use with relation
entity_fields: dict[str, ModelField] = contract_metadata.schemas[
entity_name
].__fields__
entity_fields = contract_metadata.schemas[entity_name].model_fields
ddb_schema: dict[str, DuckDBPyType] = {
fld.name: get_duckdb_type_from_annotation(fld.annotation)
for fld in entity_fields.values()
name: get_duckdb_type_from_annotation(fld.annotation)
for name, fld in entity_fields.items()
}
ddb_schema[RECORD_INDEX_COLUMN_NAME] = get_duckdb_type_from_annotation(int)
polars_schema: dict[str, PolarsType] = {
fld.name: get_polars_type_from_annotation(fld.annotation)
for fld in entity_fields.values()
name: get_polars_type_from_annotation(fld.annotation)
for name, fld in entity_fields.items()
}
polars_schema[RECORD_INDEX_COLUMN_NAME] = get_polars_type_from_annotation(int)
if relation_is_empty(relation):
Expand Down
Loading
Loading