Skip to content

Commit

Permalink
fix(ingest): cleanup config extra usage (datahub-project#6699)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored and cccs-Dustin committed Feb 1, 2023
1 parent 4ed47da commit 4007a3e
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 20 deletions.
11 changes: 10 additions & 1 deletion metadata-ingestion/src/datahub/configuration/common.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import re
import unittest.mock
from abc import ABC, abstractmethod
from enum import auto
from typing import IO, Any, ClassVar, Dict, List, Optional, Type
from typing import IO, Any, ClassVar, Dict, List, Optional, Type, TypeVar

import pydantic
from cached_property import cached_property
from pydantic import BaseModel, Extra
from pydantic.fields import Field

from datahub.configuration._config_enum import ConfigEnum
from datahub.utilities.dedup_list import deduplicate_list

_ConfigSelf = TypeVar("_ConfigSelf", bound="ConfigModel")


class ConfigModel(BaseModel):
class Config:
Expand All @@ -31,6 +35,11 @@ def schema_extra(schema: Dict[str, Any], model: Type["ConfigModel"]) -> None:
for key in remove_fields:
del schema["properties"][key]

@classmethod
def parse_obj_allow_extras(cls: Type[_ConfigSelf], obj: Any) -> _ConfigSelf:
with unittest.mock.patch.object(cls.Config, "extra", pydantic.Extra.allow):
return cls.parse_obj(obj)


class PermissiveConfigModel(ConfigModel):
# A permissive config model that allows extra fields.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from datetime import datetime, timedelta
from typing import Dict, Iterable, List, Optional, Tuple, Type, Union, cast

import pydantic
from google.cloud import bigquery
from google.cloud.bigquery.table import TableListItem

Expand Down Expand Up @@ -326,10 +325,7 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
_report: Dict[Union[SourceCapability, str], CapabilityReport] = dict()

try:
BigQueryV2Config.Config.extra = (
pydantic.Extra.allow
) # we are okay with extra fields during this stage
connection_conf = BigQueryV2Config.parse_obj(config_dict)
connection_conf = BigQueryV2Config.parse_obj_allow_extras(config_dict)
client: bigquery.Client = get_bigquery_client(connection_conf)
assert client

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Dict, Iterable, List, Optional, Tuple, Union, cast

import pandas as pd
import pydantic
from snowflake.connector import SnowflakeConnection

from datahub.configuration.pattern_utils import is_schema_allowed
Expand Down Expand Up @@ -256,10 +255,7 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
test_report = TestConnectionReport()

try:
SnowflakeV2Config.Config.extra = (
pydantic.Extra.allow
) # we are okay with extra fields during this stage
connection_conf = SnowflakeV2Config.parse_obj(config_dict)
connection_conf = SnowflakeV2Config.parse_obj_allow_extras(config_dict)

connection: SnowflakeConnection = connection_conf.get_connection()
assert connection
Expand All @@ -281,9 +277,6 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
test_report.internal_failure = True
test_report.internal_failure_reason = f"{e}"
finally:
SnowflakeV2Config.Config.extra = (
pydantic.Extra.forbid
) # set config flexibility back to strict
return test_report

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
import re
from typing import Iterable, List, Optional

import pydantic

from datahub.emitter.mce_builder import (
make_data_platform_urn,
make_dataset_urn_with_platform_instance,
Expand Down Expand Up @@ -152,10 +150,7 @@ def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig):
def test_connection(config_dict: dict) -> TestConnectionReport:
test_report = TestConnectionReport()
try:
UnityCatalogSourceConfig.Config.extra = (
pydantic.Extra.allow
) # we are okay with extra fields during this stage
config = UnityCatalogSourceConfig.parse_obj(config_dict)
config = UnityCatalogSourceConfig.parse_obj_allow_extras(config_dict)
report = UnityCatalogReport()
unity_proxy = proxy.UnityCatalogApiProxy(
config.workspace_url, config.token, report=report
Expand Down
10 changes: 10 additions & 0 deletions metadata-ingestion/tests/unit/config/test_config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ class MyConfig(ConfigModel):
MyConfig.parse_obj({"required": "foo", "extra": "extra"})


def test_extras_allowed():
class MyConfig(ConfigModel):
required: str
optional: str = "bar"

MyConfig.parse_obj_allow_extras({"required": "foo"})
MyConfig.parse_obj_allow_extras({"required": "foo", "optional": "baz"})
MyConfig.parse_obj_allow_extras({"required": "foo", "extra": "extra"})


def test_default_object_copy():
# Doing this with dataclasses would yield a subtle bug: the default list
# objects would be shared between instances. However, pydantic is smart
Expand Down

0 comments on commit 4007a3e

Please sign in to comment.