Skip to content

Commit

Permalink
TIMELY: minor refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
RudolfCardinal committed Nov 29, 2021
1 parent 8cd128f commit cc1f9c4
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 78 deletions.
125 changes: 125 additions & 0 deletions crate_anon/ancillary/timely_project/ddcriteria.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python

"""
crate_anon/ancillary/timely_project/ddcriteria.py
===============================================================================
Copyright (C) 2015-2021 Rudolf Cardinal (rudolf@pobox.com).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <https://www.gnu.org/licenses/>.
===============================================================================
Helper code for MRC TIMELY project (Moore, grant MR/T046430/1). Not of general
interest.
Helpers for data dictionary filtering.
"""

# =============================================================================
# Imports
# =============================================================================

import re
from typing import List, Optional, Tuple


# =============================================================================
# Constants
# =============================================================================

# Approvals are in stages.
N_STAGES = 6

# Arbitrary symbol that we'll use for "regex matches":
MATCHES = "≛"


# =============================================================================
# Deciding about rows
# =============================================================================

class TableCriterion:
"""
Stores a regular expression so we can reuse it compiled for speed and view
it and its associated stage.
"""
def __init__(self, stage: Optional[int], table_regex_str: str) -> None:
assert stage is None or 1 <= stage <= N_STAGES
self.stage = stage
self.table_regex_str = table_regex_str
self.table_regex_compiled = re.compile(table_regex_str,
flags=re.IGNORECASE)

def table_match(self, tablename: str) -> bool:
"""
Does ``tablename`` match our stored pattern?
"""
return bool(self.table_regex_compiled.match(tablename))

def description(self) -> str:
return f"table {MATCHES} {self.table_regex_str}"


class FieldCriterion(TableCriterion):
"""
As for :class:`TableCriterion`, but for both a table and a field (column)
name.
"""
def __init__(self, field_regex_str: str, **kwargs) -> None:
super().__init__(**kwargs)
self.field_regex_str = field_regex_str
self.field_regex_compiled = re.compile(field_regex_str,
flags=re.IGNORECASE)

def table_field_match(self, tablename: str, fieldname: str) -> bool:
"""
Do both the table and field names match?
"""
return bool(
self.table_regex_compiled.match(tablename)
and self.field_regex_compiled.match(fieldname)
)

def description(self) -> str:
return (
f"table {MATCHES} {self.table_regex_str}, "
f"field {MATCHES} {self.field_regex_str}"
)


def add_table_criteria(criteria: List[TableCriterion],
stage: Optional[int],
regex_strings: List[str]) -> None:
"""
Appends to ``criteria``.
"""
for rs in regex_strings:
criteria.append(TableCriterion(stage=stage, table_regex_str=rs))


def add_field_criteria(criteria: List[TableCriterion],
stage: Optional[int],
regex_tuples: List[Tuple[str, str]]) -> None:
"""
Appends to ``criteria``.
"""
for tablename, fieldname in regex_tuples:
criteria.append(FieldCriterion(stage=stage,
table_regex_str=tablename,
field_regex_str=fieldname))
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,18 @@
import argparse
import copy
import logging
import re
from typing import List, Optional, Tuple
from typing import List, Optional

from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger
from sqlalchemy.dialects.mssql.base import dialect as mssql_server_dialect

from crate_anon.ancillary.timely_project.ddcriteria import (
add_field_criteria,
add_table_criteria,
FieldCriterion,
N_STAGES,
TableCriterion,
)
from crate_anon.anonymise.config import Config
from crate_anon.anonymise.dd import DataDictionary
from crate_anon.anonymise.ddr import DataDictionaryRow
Expand All @@ -59,82 +65,6 @@
# Deciding about rows
# =============================================================================

# Approvals are in stages.

N_STAGES = 6


class TableCriterion:
"""
Stores a regular expression so we can reuse it compiled for speed and view
it and its associated stage.
"""
def __init__(self, stage: Optional[int], table_regex_str: str) -> None:
assert stage is None or 1 <= stage <= N_STAGES
self.stage = stage
self.table_regex_str = table_regex_str
self.table_regex_compiled = re.compile(table_regex_str,
flags=re.IGNORECASE)

def table_match(self, tablename: str) -> bool:
"""
Does ``tablename`` match our stored pattern?
"""
return bool(self.table_regex_compiled.match(tablename))

def description(self) -> str:
return f"table ≛ {self.table_regex_str}"


class FieldCriterion(TableCriterion):
"""
As for :class:`TableCriterion`, but for both a table and a field (column)
name.
"""
def __init__(self, field_regex_str: str, **kwargs) -> None:
super().__init__(**kwargs)
self.field_regex_str = field_regex_str
self.field_regex_compiled = re.compile(field_regex_str,
flags=re.IGNORECASE)

def table_field_match(self, tablename: str, fieldname: str) -> bool:
"""
Do both the table and field names match?
"""
return bool(
self.table_regex_compiled.match(tablename)
and self.field_regex_compiled.match(fieldname)
)

def description(self) -> str:
return (
f"table ≛ {self.table_regex_str}, "
f"field ≛ {self.field_regex_str}"
)


def add_table_criteria(criteria: List[TableCriterion],
stage: Optional[int],
regex_strings: List[str]) -> None:
"""
Appends to ``criteria``.
"""
for rs in regex_strings:
criteria.append(TableCriterion(stage=stage, table_regex_str=rs))


def add_field_criteria(criteria: List[TableCriterion],
stage: Optional[int],
regex_tuples: List[Tuple[str, str]]) -> None:
"""
Appends to ``criteria``.
"""
for tablename, fieldname in regex_tuples:
criteria.append(FieldCriterion(stage=stage,
table_regex_str=tablename,
field_regex_str=fieldname))


# -----------------------------------------------------------------------------
# Generic exclusions
# -----------------------------------------------------------------------------
Expand Down

0 comments on commit cc1f9c4

Please sign in to comment.