diff --git a/ENV.md b/ENV.md index b957bc11..d4496dbc 100644 --- a/ENV.md +++ b/ENV.md @@ -57,19 +57,28 @@ Note that some tasks/subtasks are themselves enabled by other tasks. ### Scheduled Task Flags -| Flag | Description | -|-------------------------------------|-------------------------------------------------------------------------------| -| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. Disabling disables all other scheduled tasks. | -| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. | -| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. | -| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. | -| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. | -| `IA_PROBE_TASK_FLAG` | Extracts and links Internet Archives metadata to URLs. | -| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. | -| `MARK_TASK_NEVER_COMPLETED_TASK_FLAG` | Marks tasks that were started but never completed (usually due to a restart). | -| `DELETE_STALE_SCREENSHOTS_TASK_FLAG` | Deletes stale screenshots for URLs already validated. | -| `TASK_CLEANUP_TASK_FLAG` | Cleans up tasks that are no longer needed. | +| Flag | Description | +|----------------------------------------|-------------------------------------------------------------------------------| +| `SCHEDULED_TASKS_FLAG` | All scheduled tasks. Disabling disables all other scheduled tasks. | +| `PUSH_TO_HUGGING_FACE_TASK_FLAG` | Pushes data to HuggingFace. | +| `POPULATE_BACKLOG_SNAPSHOT_TASK_FLAG` | Populates the backlog snapshot. | +| `DELETE_OLD_LOGS_TASK_FLAG` | Deletes old logs. | +| `RUN_URL_TASKS_TASK_FLAG` | Runs URL tasks. | +| `IA_PROBE_TASK_FLAG` | Extracts and links Internet Archives metadata to URLs. | +| `IA_SAVE_TASK_FLAG` | Saves URLs to Internet Archives. | +| `MARK_TASK_NEVER_COMPLETED_TASK_FLAG` | Marks tasks that were started but never completed (usually due to a restart). | +| `DELETE_STALE_SCREENSHOTS_TASK_FLAG` | Deletes stale screenshots for URLs already validated. | +| `TASK_CLEANUP_TASK_FLAG` | Cleans up tasks that are no longer needed. | | `REFRESH_MATERIALIZED_VIEWS_TASK_FLAG` | Refreshes materialized views. | +| `DS_APP_SYNC_AGENCY_ADD_TASK_FLAG` | Adds new agencies to the Data Sources App| +| `DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG` | Updates existing agencies in the Data Sources App| +| `DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG` | Deletes agencies in the Data Sources App| +| `DS_APP_SYNC_DATA_SOURCE_ADD_TASK_FLAG` | Adds new data sources to the Data Sources App| +| `DS_APP_SYNC_DATA_SOURCE_UPDATE_TASK_FLAG` | Updates existing data sources in the Data Sources App| +| `DS_APP_SYNC_DATA_SOURCE_DELETE_TASK_FLAG` | Deletes data sources in the Data Sources App| +| `DS_APP_SYNC_META_URL_ADD_TASK_FLAG` | Adds new meta URLs to the Data Sources App| +| `DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG` | Updates existing meta URLs in the Data Sources App| +| `DS_APP_SYNC_META_URL_DELETE_TASK_FLAG` | Deletes meta URLs in the Data Sources App| ### URL Task Flags @@ -81,7 +90,6 @@ URL Task Flags are collectively controlled by the `RUN_URL_TASKS_TASK_FLAG` flag | `URL_HTML_TASK_FLAG` | URL HTML scraping task. | | `URL_RECORD_TYPE_TASK_FLAG` | Automatically assigns Record Types to URLs. | | `URL_AGENCY_IDENTIFICATION_TASK_FLAG` | Automatically assigns and suggests Agencies for URLs. | -| `URL_SUBMIT_APPROVED_TASK_FLAG` | Submits approved URLs to the Data Sources App. | | `URL_MISC_METADATA_TASK_FLAG` | Adds misc metadata to URLs. | | `URL_AUTO_RELEVANCE_TASK_FLAG` | Automatically assigns Relevances to URLs. | | `URL_PROBE_TASK_FLAG` | Probes URLs for web metadata. | @@ -90,7 +98,6 @@ URL Task Flags are collectively controlled by the `RUN_URL_TASKS_TASK_FLAG` flag | `URL_AUTO_VALIDATE_TASK_FLAG` | Automatically validates URLs. | | `URL_AUTO_NAME_TASK_FLAG` | Automatically names URLs. | | `URL_SUSPEND_TASK_FLAG` | Suspends URLs meeting suspension criteria. | -| `URL_SUBMIT_META_URLS_TASK_FLAG` | Submits meta URLs to the Data Sources App. | ### Agency ID Subtasks diff --git a/alembic/versions/2025_10_28_1539-a57c3b5b6e93_add_sync_log_table.py b/alembic/versions/2025_10_28_1539-a57c3b5b6e93_add_sync_log_table.py new file mode 100644 index 00000000..41b02082 --- /dev/null +++ b/alembic/versions/2025_10_28_1539-a57c3b5b6e93_add_sync_log_table.py @@ -0,0 +1,636 @@ +"""Add sync_log table + +Revision ID: a57c3b5b6e93 +Revises: f32ba7664e9f +Create Date: 2025-10-28 15:39:50.494489 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +from src.util.alembic_helpers import created_at_column, updated_at_column, create_updated_at_trigger, remove_enum_value + +# revision identifiers, used by Alembic. +revision: str = 'a57c3b5b6e93' +down_revision: Union[str, None] = 'f32ba7664e9f' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _add_data_portal_type_other_to_ds_optional_metadata(): + op.add_column( + 'url_optional_data_source_metadata', + sa.Column( + 'data_portal_type_other', + sa.String(), + nullable=True + ) + ) + + +def upgrade() -> None: + _create_sync_log() + _create_ds_agency_link() + _migrate_agency_ids_to_ds_agency_link() + remove_id_column_from_agencies() + rename_agency_id_to_id() + _rename_existing_tables_to_ds_app_format() + _alter_ds_app_link_data_source_table() + _alter_ds_app_link_meta_url_table() + _add_flag_deletion_tables() + _add_last_synced_at_columns() + _add_link_table_modification_triggers() + _add_updated_at_to_optional_data_source_metadata_table() + _update_sync_tasks() + _alter_agency_jurisdiction_type_column() + _add_updated_at_to_url_record_type_table() + _add_updated_at_trigger_to_url_optional_data_source_metadata() + _add_data_portal_type_other_to_ds_optional_metadata() + +def _add_updated_at_trigger_to_url_optional_data_source_metadata(): + create_updated_at_trigger( + "url_optional_data_source_metadata" + ) + +def _add_updated_at_to_url_record_type_table(): + op.add_column( + 'url_record_type', + updated_at_column() + ) + create_updated_at_trigger( + "url_record_type" + ) + + + +def _alter_agency_jurisdiction_type_column(): + op.alter_column( + 'agencies', + 'jurisdiction_type', + nullable=False, + ) + + +def _update_sync_tasks(): + + # Drop Views + op.execute("drop view url_task_count_1_day") + op.execute("drop view url_task_count_1_week") + op.execute("drop materialized view url_status_mat_view") + + + + targets: list[tuple[str, str]] = [ + ('tasks', 'task_type'), + ('url_task_error', 'task_type') + ] + + remove_enum_value( + enum_name="task_type", + value_to_remove="Sync Agencies", + targets=targets + ) + remove_enum_value( + enum_name="task_type", + value_to_remove="Sync Data Sources", + targets=targets + ) + new_enum_values: list[str] = [ + "Sync Agencies Add", + "Sync Agencies Update", + "Sync Agencies Delete", + "Sync Data Sources Add", + "Sync Data Sources Update", + "Sync Data Sources Delete", + "Sync Meta URLs Add", + "Sync Meta URLs Update", + "Sync Meta URLs Delete", + ] + for enum_value in new_enum_values: + op.execute(f"ALTER TYPE task_type ADD VALUE '{enum_value}';") + + # Recreate Views + op.execute(""" + create view url_task_count_1_day(task_type, count) as + SELECT + t.task_type, + count(ltu.url_id) AS count + FROM + tasks t + JOIN link_task_urls ltu + ON ltu.task_id = t.id + WHERE + t.updated_at > (now() - '1 day'::interval) + GROUP BY + t.task_type; + """) + + op.execute(""" + create view url_task_count_1_week(task_type, count) as + SELECT + t.task_type, + count(ltu.url_id) AS count + FROM + tasks t + JOIN link_task_urls ltu + ON ltu.task_id = t.id + WHERE + t.updated_at > (now() - '7 days'::interval) + GROUP BY + t.task_type; + """) + + op.execute( + """ + CREATE MATERIALIZED VIEW url_status_mat_view as + with + urls_with_relevant_errors as ( + select + ute.url_id + from + url_task_error ute + where + ute.task_type in ( + 'Screenshot', + 'HTML', + 'URL Probe' + ) + ) + , status_text as ( + select + u.id as url_id, + case + when ( + -- Validated as not relevant, individual record, or not found + fuv.type in ('not relevant', 'individual record', 'not found') + ) Then 'Accepted' + when ( + (fuv.type = 'data source' and uds.url_id is null) + OR + (fuv.type = 'meta url' and udmu.url_id is null) + ) Then 'Awaiting Submission' + when ( + (fuv.type = 'data source' and uds.url_id is not null) + OR + (fuv.type = 'meta url' and udmu.url_id is not null) + ) Then 'Submitted' + when ( + -- Has compressed HTML + uch.url_id is not null + AND + -- Has web metadata + uwm.url_id is not null + AND + -- Has screenshot + us.url_id is not null + ) THEN 'Community Labeling' + when uwre.url_id is not null then 'Error' + ELSE 'Intake' + END as status + + from + urls u + left join urls_with_relevant_errors uwre + on u.id = uwre.url_id + left join url_screenshot us + on u.id = us.url_id + left join url_compressed_html uch + on u.id = uch.url_id + left join url_web_metadata uwm + on u.id = uwm.url_id + left join flag_url_validated fuv + on u.id = fuv.url_id + left join ds_app_link_meta_url udmu + on u.id = udmu.url_id + left join ds_app_link_data_source uds + on u.id = uds.url_id + ) + select + url_id, + status, + CASE status + WHEN 'Intake' THEN 100 + WHEN 'Error' THEN 110 + WHEN 'Community Labeling' THEN 200 + WHEN 'Accepted' THEN 300 + WHEN 'Awaiting Submission' THEN 380 + WHEN 'Submitted' THEN 390 + ELSE -1 + END as code + from status_text + """ + ) + + +def last_synced_at_column(): + return sa.Column( + 'last_synced_at', + sa.DateTime(), + nullable=False, + server_default=sa.func.now() + ) + + +def _add_link_table_modification_triggers(): + op.execute(""" + -- trigger func that "touches" parent rows hit by changes to the link table + CREATE OR REPLACE FUNCTION touch_url_from_agency_link() + RETURNS trigger + LANGUAGE plpgsql AS $$ + BEGIN + IF TG_OP = 'INSERT' THEN + EXECUTE $q$ + UPDATE urls u + SET updated_at = clock_timestamp() + FROM (SELECT DISTINCT url_id FROM newtab) AS hit + WHERE u.id = hit.url_id + $q$; + + ELSIF TG_OP = 'DELETE' THEN + EXECUTE $q$ + UPDATE urls u + SET updated_at = clock_timestamp() + FROM (SELECT DISTINCT url_id FROM oldtab) AS hit + WHERE u.id = hit.url_id + $q$; + + ELSE -- UPDATE + EXECUTE $q$ + UPDATE urls u + SET updated_at = clock_timestamp() + FROM ( + SELECT DISTINCT url_id FROM newtab + UNION + SELECT DISTINCT url_id FROM oldtab + ) AS hit + WHERE u.id = hit.url_id + $q$; + END IF; + + RETURN NULL; -- statement-level trigger + END $$; + + -- statement-level trigger with transition tables + CREATE TRIGGER trg_link_urls_agency_touch_url_ins + AFTER INSERT ON link_urls_agency + REFERENCING NEW TABLE AS newtab + FOR EACH STATEMENT + EXECUTE FUNCTION touch_url_from_agency_link(); + + CREATE TRIGGER trg_link_urls_agency_touch_url_upd + AFTER UPDATE ON link_urls_agency + REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab + FOR EACH STATEMENT + EXECUTE FUNCTION touch_url_from_agency_link(); + + CREATE TRIGGER trg_link_urls_agency_touch_url_del + AFTER DELETE ON link_urls_agency + REFERENCING OLD TABLE AS oldtab + FOR EACH STATEMENT + EXECUTE FUNCTION touch_url_from_agency_link(); + + """) + + op.execute( + """ + -- trigger func that "touches" agency rows hit by changes to the link_agencies_locations table + CREATE OR REPLACE FUNCTION touch_agency_from_location_link() + RETURNS trigger + LANGUAGE plpgsql AS + $$ + BEGIN + IF TG_OP = 'INSERT' THEN + EXECUTE $q$ + UPDATE agencies a + SET updated_at = clock_timestamp() + FROM (SELECT DISTINCT agency_id FROM newtab) AS hit + WHERE a.id = hit.agency_id + $q$; + + ELSIF TG_OP = 'DELETE' THEN + EXECUTE $q$ + UPDATE agencies a + SET updated_at = clock_timestamp() + FROM (SELECT DISTINCT agency_id FROM oldtab) AS hit + WHERE a.id = hit.agency_id + $q$; + + ELSE -- UPDATE + EXECUTE $q$ + UPDATE agencies a + SET updated_at = clock_timestamp() + FROM ( + SELECT DISTINCT agency_id FROM newtab + UNION + SELECT DISTINCT agency_id FROM oldtab + ) AS hit + WHERE a.id = hit.agency_id + $q$; + END IF; + + RETURN NULL; -- statement-level trigger + END + $$; + + -- statement-level trigger with transition tables + CREATE TRIGGER trg_link_agencies_locations_touch_agencies_ins + AFTER INSERT ON link_agencies_locations + REFERENCING NEW TABLE AS newtab + FOR EACH STATEMENT + EXECUTE FUNCTION touch_agency_from_location_link(); + + CREATE TRIGGER trg_link_agencies_locations_touch_agencies_upd + AFTER UPDATE ON link_agencies_locations + REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab + FOR EACH STATEMENT + EXECUTE FUNCTION touch_agency_from_location_link(); + + CREATE TRIGGER trg_link_agencies_locations_touch_agencies_del + AFTER DELETE ON link_agencies_locations + REFERENCING OLD TABLE AS oldtab + FOR EACH STATEMENT + EXECUTE FUNCTION touch_agency_from_location_link(); + """ + ) + + + + + + + +def _add_updated_at_to_optional_data_source_metadata_table(): + op.add_column( + "url_optional_data_source_metadata", + updated_at_column() + ) + create_updated_at_trigger( + "url_optional_data_source_metadata" + ) + +def _add_last_synced_at_columns(): + op.add_column( + 'ds_app_link_data_source', + last_synced_at_column() + ) + op.add_column( + 'ds_app_link_meta_url', + last_synced_at_column() + ) + + +def _alter_ds_app_link_data_source_table(): + # Drop unique constraint for data source id + op.drop_constraint( + 'uq_url_data_sources_data_source_id', + 'ds_app_link_data_source', + type_='unique' + ) + # Drop primary keys + op.drop_constraint( + 'url_data_sources_pkey', + 'ds_app_link_data_source', + type_='primary' + ) + # Rename `data_source_id` to `ds_data_source_id` + op.alter_column( + 'ds_app_link_data_source', + 'data_source_id', + new_column_name='ds_data_source_id', + ) + # Add new primary key + op.create_primary_key( + 'ds_app_link_data_source_pkey', + 'ds_app_link_data_source', + ['ds_data_source_id'] + ) + + # Drop url_id foreign key + op.drop_constraint( + 'url_data_sources_url_id_fkey', + 'ds_app_link_data_source', + type_='foreignkey' + ) + # Recreate foreign key with ON DELETE SET NULL + op.create_foreign_key( + 'ds_app_link_data_source_url_id_fkey', + 'ds_app_link_data_source', + 'urls', + ['url_id'], + ['id'], + ondelete='SET NULL' + ) + # Alter url_id column to be nullable + op.alter_column( + 'ds_app_link_data_source', + 'url_id', + nullable=True + ) + + + +def _alter_ds_app_link_meta_url_table(): + # Drop joint primary key for url_id and agency_id + op.drop_constraint( + 'url_ds_meta_url_pkey', + 'ds_app_link_meta_url', + type_='primary' + ) + # Drop unique constraint for ds_meta_url_id + op.drop_constraint( + 'url_ds_meta_url_ds_meta_url_id_key', + 'ds_app_link_meta_url', + type_='unique' + ) + # Drop agency_id column + op.drop_column( + 'ds_app_link_meta_url', + 'agency_id' + ) + # Make ds_meta_url_id primary key + op.create_primary_key( + 'ds_app_link_meta_url_pkey', + 'ds_app_link_meta_url', + ['ds_meta_url_id'] + ) + # Add unique constraint for url_id + op.create_unique_constraint( + 'uq_ds_app_link_meta_url_url_id', + 'ds_app_link_meta_url', + ['url_id'] + ) + # URL ID + ## Drop foreign key + op.drop_constraint( + 'url_ds_meta_url_url_id_fkey', + 'ds_app_link_meta_url', + type_='foreignkey' + ) + ## Recreate foreign key with ON DELETE SET NULL + op.create_foreign_key( + 'ds_app_link_meta_url_url_id_fkey', + 'ds_app_link_meta_url', + 'urls', + ['url_id'], + ['id'], + ondelete='SET NULL' + ) + ## Alter url_id column to be nullable + op.alter_column( + 'ds_app_link_meta_url', + 'url_id', + nullable=True + ) + + +def _add_flag_deletion_tables(): + op.create_table( + 'flag_ds_delete_agency', + sa.Column( + 'ds_agency_id', + sa.Integer(), + sa.ForeignKey( + 'ds_app_link_agency.ds_agency_id', + ondelete='CASCADE' + ), + primary_key=True, + nullable=False + ), + created_at_column() + ) + + op.create_table( + 'flag_ds_delete_data_source', + sa.Column( + 'ds_data_source_id', + sa.Integer(), + sa.ForeignKey( + 'ds_app_link_data_source.ds_data_source_id', + ondelete='CASCADE' + ), + primary_key=True, + nullable=False + ), + created_at_column(), + ) + + op.create_table( + 'flag_ds_delete_meta_url', + sa.Column( + 'ds_meta_url_id', + sa.Integer(), + sa.ForeignKey( + 'ds_app_link_meta_url.ds_meta_url_id', + ondelete='CASCADE' + ), + primary_key=True, + nullable=False + ), + created_at_column(), + ) + + +def _rename_existing_tables_to_ds_app_format(): + op.rename_table( + 'url_data_source', + 'ds_app_link_data_source' + ) + op.rename_table( + 'url_ds_meta_url', + 'ds_app_link_meta_url' + ) + +def _migrate_agency_ids_to_ds_agency_link(): + """ + While this migration uses the existing DS agency IDs for both sm and ds agency ids + From this point onward the sm ID is internal to the SM application, + and the same is true for DS ID. + """ + + op.execute(""" + INSERT INTO ds_app_link_agency(agency_id, ds_agency_id) + SELECT agency_id, agency_id + FROM agencies + """) + + +def remove_id_column_from_agencies(): + op.drop_column( + 'agencies', + 'id' + ) + +def rename_agency_id_to_id(): + op.alter_column( + 'agencies', + 'agency_id', + new_column_name='id' + ) + +def _create_ds_agency_link(): + op.create_table( + 'ds_app_link_agency', + sa.Column( + 'agency_id', + sa.Integer(), + sa.ForeignKey( + 'agencies.agency_id', + ondelete='SET NULL' + ), + nullable=True + ), + sa.Column( + 'ds_agency_id', + sa.Integer(), + nullable=False, + primary_key=True + ), + created_at_column(), + last_synced_at_column(), + sa.UniqueConstraint( + "agency_id", name="uq_ds_app_link_agency_agency_id" + ) + ) + + +def _create_sync_log(): + op.create_table( + 'sync_log', + sa.Column( + 'resource_type', + sa.Enum( + 'agency', + 'data_source', + 'meta_url', + name='resource_type_enum' + ), + nullable=False, + ), + sa.Column( + 'sync_type', + sa.Enum( + 'add', + 'update', + 'delete', + name='sync_type_enum' + ), + nullable=False, + ), + sa.Column( + 'count', + sa.Integer(), + nullable=False, + ), + created_at_column(), + sa.PrimaryKeyConstraint( + 'resource_type', + 'sync_type', + 'created_at' + ) + ) + + +def downgrade() -> None: + pass diff --git a/src/api/endpoints/agencies/by_id/delete/query.py b/src/api/endpoints/agencies/by_id/delete/query.py index 61ce2653..627fc932 100644 --- a/src/api/endpoints/agencies/by_id/delete/query.py +++ b/src/api/endpoints/agencies/by_id/delete/query.py @@ -1,7 +1,9 @@ -from sqlalchemy import delete +from sqlalchemy import delete, select from sqlalchemy.ext.asyncio import AsyncSession +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.flag.ds_delete.agency import FlagDSDeleteAgency from src.db.queries.base.builder import QueryBuilderBase @@ -15,8 +17,25 @@ def __init__( self.agency_id = agency_id async def run(self, session: AsyncSession) -> None: + # Check for existence of DS App Link. If so, add deletion flag + query = ( + select( + DSAppLinkAgency + ) + .where( + DSAppLinkAgency.agency_id == self.agency_id + ) + ) + ds_app_link_agency: DSAppLinkAgency | None = await self.sh.one_or_none(session, query=query) + if ds_app_link_agency is not None: + flag = FlagDSDeleteAgency( + ds_agency_id=ds_app_link_agency.ds_agency_id, + ) + session.add(flag) + + # Delete Agency statement = ( delete(Agency) - .where(Agency.agency_id == self.agency_id) + .where(Agency.id == self.agency_id) ) await session.execute(statement) \ No newline at end of file diff --git a/src/api/endpoints/agencies/by_id/put/query.py b/src/api/endpoints/agencies/by_id/put/query.py index 0f58a7db..942203fc 100644 --- a/src/api/endpoints/agencies/by_id/put/query.py +++ b/src/api/endpoints/agencies/by_id/put/query.py @@ -25,7 +25,7 @@ async def run(self, session: AsyncSession) -> None: Agency ) .where( - Agency.agency_id == self.agency_id + Agency.id == self.agency_id ) ) diff --git a/src/api/endpoints/agencies/root/get/query.py b/src/api/endpoints/agencies/root/get/query.py index 9452f12e..ae3b943d 100644 --- a/src/api/endpoints/agencies/root/get/query.py +++ b/src/api/endpoints/agencies/root/get/query.py @@ -42,7 +42,7 @@ async def run(self, session: AsyncSession) -> list[AgencyGetResponse]: for location in agency.locations ] responses.append(AgencyGetResponse( - id=agency.agency_id, + id=agency.id, name=agency.name, type=agency.agency_type, jurisdiction_type=agency.jurisdiction_type, diff --git a/src/api/endpoints/agencies/root/post/query.py b/src/api/endpoints/agencies/root/post/query.py index 29ff9823..43064f85 100644 --- a/src/api/endpoints/agencies/root/post/query.py +++ b/src/api/endpoints/agencies/root/post/query.py @@ -26,7 +26,7 @@ async def run(self, session: AsyncSession) -> AgencyPostResponse: session.add(agency) await session.flush() await session.refresh(agency) - agency_id: int = agency.agency_id + agency_id: int = agency.id try: diff --git a/src/api/endpoints/annotate/_shared/extract.py b/src/api/endpoints/annotate/_shared/extract.py index 61e92c35..1a0932d3 100644 --- a/src/api/endpoints/annotate/_shared/extract.py +++ b/src/api/endpoints/annotate/_shared/extract.py @@ -17,7 +17,7 @@ from src.db.dto_converter import DTOConverter from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion async def extract_and_format_get_annotation_result( @@ -55,7 +55,7 @@ async def extract_and_format_get_annotation_result( batch_info=await GetAnnotationBatchInfoQueryBuilder( batch_id=batch_id, models=[ - UserUrlAgencySuggestion, + UserURLAgencySuggestion, ] ).run(session), location_suggestions=location_suggestions, diff --git a/src/api/endpoints/annotate/all/get/queries/agency/requester.py b/src/api/endpoints/annotate/all/get/queries/agency/requester.py index fc309e50..e6ffb817 100644 --- a/src/api/endpoints/annotate/all/get/queries/agency/requester.py +++ b/src/api/endpoints/annotate/all/get/queries/agency/requester.py @@ -11,7 +11,7 @@ from src.db.models.impl.agency.sqlalchemy import Agency from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.templates.requester import RequesterBase @@ -30,13 +30,13 @@ def __init__( async def get_user_agency_suggestions(self) -> list[AgencyAnnotationUserSuggestion]: query = ( select( - UserUrlAgencySuggestion.agency_id, - func.count(UserUrlAgencySuggestion.user_id).label("count"), + UserURLAgencySuggestion.agency_id, + func.count(UserURLAgencySuggestion.user_id).label("count"), Agency.name.label("agency_name"), ) .join( Agency, - Agency.agency_id == UserUrlAgencySuggestion.agency_id + Agency.id == UserURLAgencySuggestion.agency_id ) ) @@ -45,7 +45,7 @@ async def get_user_agency_suggestions(self) -> list[AgencyAnnotationUserSuggesti query = ( query.join( LinkAgencyLocation, - LinkAgencyLocation.agency_id == UserUrlAgencySuggestion.agency_id + LinkAgencyLocation.agency_id == UserURLAgencySuggestion.agency_id ) .where( LinkAgencyLocation.location_id == self.location_id @@ -54,14 +54,14 @@ async def get_user_agency_suggestions(self) -> list[AgencyAnnotationUserSuggesti query = ( query.where( - UserUrlAgencySuggestion.url_id == self.url_id + UserURLAgencySuggestion.url_id == self.url_id ) .group_by( - UserUrlAgencySuggestion.agency_id, + UserURLAgencySuggestion.agency_id, Agency.name ) .order_by( - func.count(UserUrlAgencySuggestion.user_id).desc() + func.count(UserURLAgencySuggestion.user_id).desc() ) .limit(3) ) @@ -88,7 +88,7 @@ async def get_auto_agency_suggestions(self) -> list[AgencyAnnotationAutoSuggesti ) .join( Agency, - Agency.agency_id == cte.agency_id + Agency.id == cte.agency_id ) ) diff --git a/src/api/endpoints/annotate/all/get/queries/core.py b/src/api/endpoints/annotate/all/get/queries/core.py index e37f2396..9b905870 100644 --- a/src/api/endpoints/annotate/all/get/queries/core.py +++ b/src/api/endpoints/annotate/all/get/queries/core.py @@ -8,7 +8,7 @@ from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion @@ -68,10 +68,10 @@ async def run( ) ), ~exists( - select(UserUrlAgencySuggestion.id) + select(UserURLAgencySuggestion.id) .where( - UserUrlAgencySuggestion.url_id == URL.id, - UserUrlAgencySuggestion.user_id == self.user_id, + UserURLAgencySuggestion.url_id == URL.id, + UserURLAgencySuggestion.user_id == self.user_id, ) ), ~exists( diff --git a/src/api/endpoints/annotate/all/post/requester.py b/src/api/endpoints/annotate/all/post/requester.py index 14064e8a..2d9cfeca 100644 --- a/src/api/endpoints/annotate/all/post/requester.py +++ b/src/api/endpoints/annotate/all/post/requester.py @@ -6,7 +6,7 @@ from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion @@ -53,7 +53,7 @@ def add_relevant_annotation( def add_agency_ids(self, agency_ids: list[int]) -> None: for agency_id in agency_ids: - agency_suggestion = UserUrlAgencySuggestion( + agency_suggestion = UserURLAgencySuggestion( url_id=self.url_id, user_id=self.user_id, agency_id=agency_id, diff --git a/src/api/endpoints/contributions/user/queries/agreement/agency.py b/src/api/endpoints/contributions/user/queries/agreement/agency.py index 96011e06..488e5c19 100644 --- a/src/api/endpoints/contributions/user/queries/agreement/agency.py +++ b/src/api/endpoints/contributions/user/queries/agreement/agency.py @@ -3,7 +3,7 @@ from src.api.endpoints.contributions.user.queries.annotated_and_validated import AnnotatedAndValidatedCTEContainer from src.api.endpoints.contributions.user.queries.templates.agreement import AgreementCTEContainer from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion def get_agency_agreement_cte_container( @@ -16,10 +16,10 @@ def get_agency_agreement_cte_container( func.count() ) .join( - UserUrlAgencySuggestion, + UserURLAgencySuggestion, and_( - inner_cte.user_id == UserUrlAgencySuggestion.user_id, - inner_cte.url_id == UserUrlAgencySuggestion.url_id + inner_cte.user_id == UserURLAgencySuggestion.user_id, + inner_cte.url_id == UserURLAgencySuggestion.url_id ) ) .group_by( @@ -34,17 +34,17 @@ def get_agency_agreement_cte_container( func.count() ) .join( - UserUrlAgencySuggestion, + UserURLAgencySuggestion, and_( - inner_cte.user_id == UserUrlAgencySuggestion.user_id, - inner_cte.url_id == UserUrlAgencySuggestion.url_id + inner_cte.user_id == UserURLAgencySuggestion.user_id, + inner_cte.url_id == UserURLAgencySuggestion.url_id ) ) .where( exists() .where( - LinkURLAgency.url_id == UserUrlAgencySuggestion.url_id, - LinkURLAgency.agency_id == UserUrlAgencySuggestion.agency_id + LinkURLAgency.url_id == UserURLAgencySuggestion.url_id, + LinkURLAgency.agency_id == UserURLAgencySuggestion.agency_id ) ) .group_by( diff --git a/src/api/endpoints/data_source/get/query.py b/src/api/endpoints/data_source/get/query.py index e9d0598b..e15ce6b1 100644 --- a/src/api/endpoints/data_source/get/query.py +++ b/src/api/endpoints/data_source/get/query.py @@ -98,7 +98,7 @@ async def run(self, session: AsyncSession) -> DataSourceGetOuterResponse: url_agency_ids: list[int] = [] for agency in url.confirmed_agencies: - url_agency_ids.append(agency.agency_id) + url_agency_ids.append(agency.id) url_description: str | None = mapping[URL.description] link_batch_url_batch_id: int | None = mapping[LinkBatchURL.batch_id] diff --git a/src/api/endpoints/meta_url/by_id/agencies/put/__init__.py b/src/api/endpoints/meta_url/by_id/put/__init__.py similarity index 100% rename from src/api/endpoints/meta_url/by_id/agencies/put/__init__.py rename to src/api/endpoints/meta_url/by_id/put/__init__.py diff --git a/src/api/endpoints/meta_url/by_id/agencies/put/query.py b/src/api/endpoints/meta_url/by_id/put/query.py similarity index 88% rename from src/api/endpoints/meta_url/by_id/agencies/put/query.py rename to src/api/endpoints/meta_url/by_id/put/query.py index a3be8cf8..7392375c 100644 --- a/src/api/endpoints/meta_url/by_id/agencies/put/query.py +++ b/src/api/endpoints/meta_url/by_id/put/query.py @@ -1,8 +1,7 @@ from sqlalchemy.ext.asyncio import AsyncSession -from src.api.endpoints.meta_url.by_id.agencies.put.request import UpdateMetaURLRequest +from src.api.endpoints.meta_url.by_id.put.request import UpdateMetaURLRequest from src.api.shared.batch.url.link import UpdateBatchURLLinkQueryBuilder -from src.api.shared.record_type.put.query import UpdateRecordTypeQueryBuilder from src.api.shared.url.put.query import UpdateURLQueryBuilder from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/api/endpoints/meta_url/by_id/agencies/put/request.py b/src/api/endpoints/meta_url/by_id/put/request.py similarity index 100% rename from src/api/endpoints/meta_url/by_id/agencies/put/request.py rename to src/api/endpoints/meta_url/by_id/put/request.py diff --git a/src/api/endpoints/meta_url/get/query.py b/src/api/endpoints/meta_url/get/query.py index 202626d8..740dfd69 100644 --- a/src/api/endpoints/meta_url/get/query.py +++ b/src/api/endpoints/meta_url/get/query.py @@ -64,7 +64,7 @@ async def run(self, session: AsyncSession) -> MetaURLGetOuterResponse: url_name: str = mapping[URL.name] url_agency_ids: list[int] = [] for agency in url.confirmed_agencies: - url_agency_ids.append(agency.agency_id) + url_agency_ids.append(agency.id) url_description: str | None = mapping[URL.description] link_batch_url_batch_id: int | None = mapping[LinkBatchURL.batch_id] responses.append( diff --git a/src/api/endpoints/meta_url/routes.py b/src/api/endpoints/meta_url/routes.py index 0f14805c..79a5ab03 100644 --- a/src/api/endpoints/meta_url/routes.py +++ b/src/api/endpoints/meta_url/routes.py @@ -1,15 +1,15 @@ from fastapi import APIRouter, Depends, Query from src.api.dependencies import get_async_core -from src.api.endpoints.agencies.root.get.response import AgencyGetResponse, AgencyGetOuterResponse +from src.api.endpoints.agencies.root.get.response import AgencyGetOuterResponse from src.api.endpoints.meta_url.by_id.agencies.delete.wrapper import delete_meta_url_agency_link from src.api.endpoints.meta_url.by_id.agencies.get.wrapper import get_meta_url_agencies_wrapper from src.api.endpoints.meta_url.by_id.agencies.shared.check import check_is_meta_url from src.api.endpoints.meta_url.by_id.post.wrapper import add_meta_url_agency_link from src.api.endpoints.meta_url.get.query import GetMetaURLQueryBuilder -from src.api.endpoints.meta_url.get.response import MetaURLGetResponse, MetaURLGetOuterResponse -from src.api.endpoints.meta_url.by_id.agencies.put.query import UpdateMetaURLQueryBuilder -from src.api.endpoints.meta_url.by_id.agencies.put.request import UpdateMetaURLRequest +from src.api.endpoints.meta_url.get.response import MetaURLGetOuterResponse +from src.api.endpoints.meta_url.by_id.put.query import UpdateMetaURLQueryBuilder +from src.api.endpoints.meta_url.by_id.put.request import UpdateMetaURLRequest from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore diff --git a/src/api/endpoints/metrics/batches/aggregated/query/core.py b/src/api/endpoints/metrics/batches/aggregated/query/core.py index c17f0f6d..cc6259de 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/core.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/core.py @@ -21,7 +21,7 @@ from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.queries.base.builder import QueryBuilderBase from src.db.statement_composer import StatementComposer diff --git a/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py b/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py index ee8f8065..e3fa9d14 100644 --- a/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py +++ b/src/api/endpoints/metrics/batches/aggregated/query/submitted_/query.py @@ -8,7 +8,7 @@ from src.db.helpers.session import session_helper as sh from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.queries.base.builder import QueryBuilderBase @@ -20,15 +20,15 @@ async def run(self, session: AsyncSession) -> list[ query = ( select( Batch.strategy, - func.count(URLDataSource.id).label("count") + func.count(DSAppLinkDataSource.id).label("count") ) .join( LinkBatchURL, LinkBatchURL.batch_id == Batch.id ) .join( - URLDataSource, - URLDataSource.url_id == LinkBatchURL.url_id + DSAppLinkDataSource, + DSAppLinkDataSource.url_id == LinkBatchURL.url_id ) .group_by(Batch.strategy) ) diff --git a/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py b/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py index face1891..1fd616a6 100644 --- a/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py +++ b/src/api/endpoints/metrics/batches/breakdown/submitted/cte_.py @@ -3,20 +3,20 @@ from src.api.endpoints.metrics.batches.breakdown.templates.cte_ import BatchesBreakdownURLCTE from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource SUBMITTED_CTE = BatchesBreakdownURLCTE( select( Batch.id, - func.count(URLDataSource.id).label("count_submitted") + func.count(DSAppLinkDataSource.id).label("count_submitted") ) .join( LinkBatchURL, LinkBatchURL.batch_id == Batch.id ) .join( - URLDataSource, - URLDataSource.url_id == LinkBatchURL.url_id + DSAppLinkDataSource, + DSAppLinkDataSource.url_id == LinkBatchURL.url_id ) .group_by(Batch.id) .cte("submitted") diff --git a/src/api/endpoints/metrics/urls/breakdown/query/core.py b/src/api/endpoints/metrics/urls/breakdown/query/core.py index e585554c..2606a079 100644 --- a/src/api/endpoints/metrics/urls/breakdown/query/core.py +++ b/src/api/endpoints/metrics/urls/breakdown/query/core.py @@ -8,7 +8,7 @@ from src.collectors.enums import URLStatus from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.queries.base.builder import QueryBuilderBase @@ -27,13 +27,13 @@ async def run(self, session: AsyncSession) -> GetMetricsURLsBreakdownPendingResp case((UserURLTypeSuggestion.url_id != None, literal(True)), else_=literal(False)).label( "has_user_relevant_annotation" ), - case((UserUrlAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label( + case((UserURLAgencySuggestion.url_id != None, literal(True)), else_=literal(False)).label( "has_user_agency_annotation" ), ) .outerjoin(UserRecordTypeSuggestion, URL.id == UserRecordTypeSuggestion.url_id) .outerjoin(UserURLTypeSuggestion, URL.id == UserURLTypeSuggestion.url_id) - .outerjoin(UserUrlAgencySuggestion, URL.id == UserUrlAgencySuggestion.url_id) + .outerjoin(UserURLAgencySuggestion, URL.id == UserURLAgencySuggestion.url_id) ).cte("flags") month = func.date_trunc('month', URL.created_at) diff --git a/src/api/endpoints/review/approve/query_/core.py b/src/api/endpoints/review/approve/query_/core.py index b7abec5a..ff7a1c1f 100644 --- a/src/api/endpoints/review/approve/query_/core.py +++ b/src/api/endpoints/review/approve/query_/core.py @@ -37,7 +37,7 @@ async def run(self, session: AsyncSession) -> None: # Get existing agency ids existing_agencies = url.confirmed_agencies or [] - existing_agency_ids = [agency.agency_id for agency in existing_agencies] + existing_agency_ids = [agency.id for agency in existing_agencies] new_agency_ids = self.approval_info.agency_ids or [] await self._check_for_unspecified_agency_ids(existing_agency_ids, new_agency_ids) @@ -141,7 +141,7 @@ async def _add_new_agencies(self, existing_agency_ids, new_agency_ids, session): # Check if the new agency exists in the database query = ( select(Agency) - .where(Agency.agency_id == new_agency_id) + .where(Agency.id == new_agency_id) ) existing_agency = await session.execute(query) existing_agency = existing_agency.scalars().first() diff --git a/src/api/endpoints/search/agency/query.py b/src/api/endpoints/search/agency/query.py index 9476e039..254d90f5 100644 --- a/src/api/endpoints/search/agency/query.py +++ b/src/api/endpoints/search/agency/query.py @@ -30,7 +30,7 @@ async def run(self, session: AsyncSession) -> list[AgencySearchResponse]: query = ( select( - Agency.agency_id, + Agency.id.label("agency_id"), Agency.name.label("agency_name"), Agency.jurisdiction_type, Agency.agency_type, @@ -40,7 +40,7 @@ async def run(self, session: AsyncSession) -> list[AgencySearchResponse]: if self.location_id is None: query = query.join( LinkAgencyLocation, - LinkAgencyLocation.agency_id == Agency.agency_id + LinkAgencyLocation.agency_id == Agency.id ).join( LocationExpandedView, LocationExpandedView.id == LinkAgencyLocation.location_id @@ -49,7 +49,7 @@ async def run(self, session: AsyncSession) -> list[AgencySearchResponse]: with_location_id_cte_container = WithLocationIdCTEContainer(self.location_id) query = query.join( with_location_id_cte_container.cte, - with_location_id_cte_container.agency_id == Agency.agency_id + with_location_id_cte_container.agency_id == Agency.id ).join( LocationExpandedView, LocationExpandedView.id == with_location_id_cte_container.location_id diff --git a/src/api/endpoints/submit/url/queries/core.py b/src/api/endpoints/submit/url/queries/core.py index f65f81d0..9f3e7117 100644 --- a/src/api/endpoints/submit/url/queries/core.py +++ b/src/api/endpoints/submit/url/queries/core.py @@ -12,7 +12,7 @@ from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion @@ -112,7 +112,7 @@ async def run(self, session: AsyncSession) -> URLSubmissionResponse: # Add agency ID as suggestion if exists if self.request.agency_id is not None: - agen_sugg = UserUrlAgencySuggestion( + agen_sugg = UserURLAgencySuggestion( user_id=self.user_id, url_id=url_insert.id, agency_id=self.request.agency_id diff --git a/src/core/tasks/url/operators/submit_approved/__init__.py b/src/api/endpoints/url/by_id/delete/__init__.py similarity index 100% rename from src/core/tasks/url/operators/submit_approved/__init__.py rename to src/api/endpoints/url/by_id/delete/__init__.py diff --git a/src/api/endpoints/url/by_id/delete/query.py b/src/api/endpoints/url/by_id/delete/query.py new file mode 100644 index 00000000..f8eba43d --- /dev/null +++ b/src/api/endpoints/url/by_id/delete/query.py @@ -0,0 +1,79 @@ +from typing import Any + +from sqlalchemy import select, delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.flag.ds_delete.data_source import FlagDSDeleteDataSource +from src.db.models.impl.flag.ds_delete.meta_url import FlagDSDeleteMetaURL +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.db.queries.base.builder import QueryBuilderBase + + +class DeleteURLQueryBuilder(QueryBuilderBase): + + def __init__( + self, + url_id: int + ): + super().__init__() + self.url_id = url_id + + async def run(self, session: AsyncSession) -> Any: + + await self._check_for_ds_app_link_data_source(session) + await self._check_for_ds_app_link_meta_url(session) + statement = ( + delete( + URL + ).where( + URL.id == self.url_id + ) + ) + await session.execute(statement) + + async def _check_for_ds_app_link_data_source( + self, + session: AsyncSession + ) -> Any: + """ + Check if a DS App Link Data Source exists for the URL + If so, add a deletion flag + """ + query = ( + select(DSAppLinkDataSource) + .where(DSAppLinkDataSource.url_id == self.url_id) + ) + ds_app_link_data_source: DSAppLinkDataSource | None = await self.sh.one_or_none( + session=session, + query=query + ) + if ds_app_link_data_source is not None: + delete_flag = FlagDSDeleteDataSource( + ds_data_source_id=ds_app_link_data_source.ds_data_source_id + ) + session.add(delete_flag) + + async def _check_for_ds_app_link_meta_url( + self, + session: AsyncSession + ) -> Any: + """ + Check if a DS App Link Meta URL exists for the URL + If so, add a deletion flag + """ + query = ( + select(DSAppLinkMetaURL) + .where(DSAppLinkMetaURL.url_id == self.url_id) + ) + ds_app_link_meta_url: DSAppLinkMetaURL | None = await self.sh.one_or_none( + session=session, + query=query + ) + if ds_app_link_meta_url is not None: + delete_flag = FlagDSDeleteMetaURL( + ds_meta_url_id=ds_app_link_meta_url.ds_meta_url_id + ) + session.add(delete_flag) + diff --git a/src/api/endpoints/url/routes.py b/src/api/endpoints/url/routes.py index c7bb59b0..7d184e6e 100644 --- a/src/api/endpoints/url/routes.py +++ b/src/api/endpoints/url/routes.py @@ -1,8 +1,10 @@ from fastapi import APIRouter, Query, Depends, Response from src.api.dependencies import get_async_core +from src.api.endpoints.url.by_id.delete.query import DeleteURLQueryBuilder from src.api.endpoints.url.by_id.screenshot.wrapper import get_url_screenshot_wrapper from src.api.endpoints.url.get.dto import GetURLsResponseInfo +from src.api.shared.models.message_response import MessageResponse from src.core.core import AsyncCore from src.security.manager import get_access_info from src.security.dtos.access_info import AccessInfo @@ -43,3 +45,13 @@ async def get_url_screenshot( content=raw_result, media_type="image/webp" ) + +@url_router.delete("/{url_id}") +async def delete_url( + url_id: int, + async_core: AsyncCore = Depends(get_async_core), +) -> MessageResponse: + await async_core.adb_client.run_query_builder( + DeleteURLQueryBuilder(url_id=url_id) + ) + return MessageResponse(message="URL deleted.") diff --git a/src/api/main.py b/src/api/main.py index 2dd7fa24..27abcb62 100644 --- a/src/api/main.py +++ b/src/api/main.py @@ -5,6 +5,7 @@ from discord_poster import DiscordPoster from fastapi import FastAPI from pdap_access_manager import AccessManager +from sqlalchemy.ext.asyncio import create_async_engine from starlette.responses import RedirectResponse from src.api.endpoints.agencies.routes import agencies_router @@ -52,12 +53,9 @@ async def lifespan(app: FastAPI): env.read_env() # Initialize shared dependencies - db_client = DatabaseClient( - db_url=env_var_manager.get_postgres_connection_string() - ) - adb_client = AsyncDatabaseClient( - db_url=env_var_manager.get_postgres_connection_string(is_async=True) - ) + + db_client = DatabaseClient() + adb_client = AsyncDatabaseClient() await setup_database(db_client) core_logger = AsyncCoreLogger(adb_client=adb_client) diff --git a/src/api/shared/agency/get/query.py b/src/api/shared/agency/get/query.py index b49e47ee..eccb3581 100644 --- a/src/api/shared/agency/get/query.py +++ b/src/api/shared/agency/get/query.py @@ -30,7 +30,7 @@ async def run(self, session: AsyncSession) -> AgencyGetOuterResponse: ) .join( LinkURLAgency, - LinkURLAgency.agency_id == Agency.agency_id + LinkURLAgency.agency_id == Agency.id ) .where( LinkURLAgency.url_id == self.url_id @@ -52,7 +52,7 @@ async def run(self, session: AsyncSession) -> AgencyGetOuterResponse: for location in agency.locations ] responses.append(AgencyGetResponse( - id=agency.agency_id, + id=agency.id, name=agency.name, type=agency.agency_type, jurisdiction_type=agency.jurisdiction_type, diff --git a/src/core/tasks/base/operator.py b/src/core/tasks/base/operator.py index 51f07a47..719abdf5 100644 --- a/src/core/tasks/base/operator.py +++ b/src/core/tasks/base/operator.py @@ -1,5 +1,6 @@ import traceback from abc import ABC, abstractmethod +from typing import Any from src.core.enums import BatchStatus from src.core.tasks.base.run_info import TaskOperatorRunInfo @@ -9,6 +10,7 @@ from src.db.models.impl.task.enums import TaskStatus from src.db.models.impl.url.task_error.pydantic_.insert import URLTaskErrorPydantic from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall +from src.db.queries.base.builder import QueryBuilderBase class TaskOperatorBase(ABC): @@ -53,9 +55,17 @@ async def run_task(self) -> TaskOperatorRunInfo: message=str(e) + "\n" + stack_trace ) - @abstractmethod - async def run_info(self, outcome: TaskOperatorOutcome, message: str) -> TaskOperatorRunInfo: - raise NotImplementedError + async def run_info( + self, + outcome: TaskOperatorOutcome, + message: str + ) -> TaskOperatorRunInfo: + return TaskOperatorRunInfo( + task_id=self.task_id, + task_type=self.task_type, + outcome=outcome, + message=message + ) @abstractmethod @@ -82,4 +92,8 @@ async def add_task_errors( ) for error in errors ] - await self.adb_client.bulk_insert(inserts) \ No newline at end of file + await self.adb_client.bulk_insert(inserts) + + # Convenience forwarder functions + async def run_query_builder(self, query_builder: QueryBuilderBase) -> Any: + return await self.adb_client.run_query_builder(query_builder) \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_approved/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/__init__.py similarity index 100% rename from src/core/tasks/url/operators/submit_approved/queries/__init__.py rename to src/core/tasks/scheduled/impl/sync_to_ds/__init__.py diff --git a/src/core/tasks/url/operators/submit_meta_urls/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/__init__.py similarity index 100% rename from src/core/tasks/url/operators/submit_meta_urls/__init__.py rename to src/core/tasks/scheduled/impl/sync_to_ds/impl/__init__.py diff --git a/src/core/tasks/url/operators/submit_meta_urls/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/__init__.py similarity index 100% rename from src/core/tasks/url/operators/submit_meta_urls/queries/__init__.py rename to src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/__init__.py diff --git a/src/external/pdap/impl/meta_urls/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/__init__.py similarity index 100% rename from src/external/pdap/impl/meta_urls/__init__.py rename to src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/__init__.py diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/core.py new file mode 100644 index 00000000..e46deed5 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/core.py @@ -0,0 +1,50 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.queries.add_links import \ + DSAppSyncAgenciesAddInsertLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.queries.get import DSAppSyncAgenciesAddGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.queries.prereq import \ + DSAppSyncAgenciesAddPrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.agencies.add.core import AddAgenciesRequestBuilder +from src.external.pdap.impl.sync.agencies.add.request import AddAgenciesOuterRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel + + +class DSAppSyncAgenciesAddTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_AGENCIES_ADD + + async def meets_task_prerequisites(self) -> bool: + return await self.run_query_builder( + DSAppSyncAgenciesAddPrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + request: AddAgenciesOuterRequest = await self.get_request_input() + responses: list[DSAppSyncAddResponseInnerModel] = await self.make_request(request) + await self.insert_ds_app_links(responses) + + async def get_request_input(self) -> AddAgenciesOuterRequest: + return await self.run_query_builder( + DSAppSyncAgenciesAddGetQueryBuilder() + ) + + async def make_request( + self, + request: AddAgenciesOuterRequest + ) -> list[DSAppSyncAddResponseInnerModel]: + return await self.pdap_client.run_request_builder( + AddAgenciesRequestBuilder(request) + ) + + async def insert_ds_app_links( + self, + responses: list[DSAppSyncAddResponseInnerModel] + ) -> None: + await self.run_query_builder( + DSAppSyncAgenciesAddInsertLinksQueryBuilder(responses) + ) diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/submit_approved/__init__.py rename to src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/__init__.py diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/add_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/add_links.py new file mode 100644 index 00000000..36a3ebc0 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/add_links.py @@ -0,0 +1,26 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel + + +class DSAppSyncAgenciesAddInsertLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + mappings: list[DSAppSyncAddResponseInnerModel] + ): + super().__init__() + self._mappings = mappings + + async def run(self, session: AsyncSession) -> None: + inserts: list[DSAppLinkAgency] = [] + for mapping in self._mappings: + inserts.append( + DSAppLinkAgency( + ds_agency_id=mapping.app_id, + agency_id=mapping.request_id, + ) + ) + session.add_all(inserts) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/cte.py new file mode 100644 index 00000000..b91feb11 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/cte.py @@ -0,0 +1,32 @@ +""" +Agencies to be added to the DS database must not have a +ds app link entry +""" +from sqlalchemy import Column, select, exists, CTE + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.sqlalchemy import Agency + + +class DSAppLinkSyncAgencyAddPrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + Agency.id + ) + .where( + ~exists( + select(DSAppLinkAgency.agency_id) + .where(DSAppLinkAgency.agency_id == Agency.id) + ) + ).cte("ds_app_link_sync_agency_add_prerequisites") + ) + + @property + def agency_id(self) -> Column[int]: + return self._cte.columns.id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/get.py new file mode 100644 index 00000000..1ae9a13c --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/get.py @@ -0,0 +1,70 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping, func +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.queries.cte import \ + DSAppLinkSyncAgencyAddPrerequisitesCTEContainer +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel +from src.external.pdap.impl.sync.agencies.add.request import AddAgenciesOuterRequest, AddAgenciesInnerRequest + + +class DSAppSyncAgenciesAddGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> AddAgenciesOuterRequest: + cte = DSAppLinkSyncAgencyAddPrerequisitesCTEContainer() + + location_id_cte = ( + select( + LinkAgencyLocation.agency_id, + func.array_agg(LinkAgencyLocation.location_id).label("location_ids"), + ) + .group_by( + LinkAgencyLocation.agency_id, + ) + .cte("location_id_cte") + ) + + query = ( + select( + cte.agency_id, + Agency.name, + Agency.jurisdiction_type, + Agency.agency_type, + location_id_cte.c.location_ids, + ) + .join( + Agency, + Agency.id == cte.agency_id, + ) + .join( + location_id_cte, + location_id_cte.c.agency_id == cte.agency_id, + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + inner_requests: list[AddAgenciesInnerRequest] = [] + for mapping in mappings: + inner_requests.append( + AddAgenciesInnerRequest( + request_id=mapping[cte.agency_id], + content=AgencySyncContentModel( + name=mapping[Agency.name], + jurisdiction_type=mapping[Agency.jurisdiction_type], + agency_type=mapping[Agency.agency_type], + location_ids=mapping["location_ids"] + ) + ) + ) + + return AddAgenciesOuterRequest( + agencies=inner_requests, + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/prereq.py new file mode 100644 index 00000000..61097fc6 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/add/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.queries.cte import \ + DSAppLinkSyncAgencyAddPrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesAddPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncAgencyAddPrerequisitesCTEContainer().agency_id + ) + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/impl/submit_meta_urls/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/__init__.py similarity index 100% rename from tests/automated/integration/tasks/url/impl/submit_meta_urls/__init__.py rename to src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/__init__.py diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/core.py new file mode 100644 index 00000000..e84d3b2b --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/core.py @@ -0,0 +1,64 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.queries.delete_flags import \ + DSAppSyncAgenciesDeleteRemoveFlagsQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.queries.delete_links import \ + DSAppSyncAgenciesDeleteRemoveLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.queries.get import \ + DSAppSyncAgenciesDeleteGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.queries.prereq import \ + DSAppSyncAgenciesDeletePrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.agencies.delete.core import DeleteAgenciesRequestBuilder + + +class DSAppSyncAgenciesDeleteTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_AGENCIES_DELETE + + async def meets_task_prerequisites(self) -> bool: + return await self.adb_client.run_query_builder( + DSAppSyncAgenciesDeletePrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + ds_app_ids: list[int] = await self.get_inputs() + await self.make_request(ds_app_ids) + await self.delete_flags(ds_app_ids) + await self.delete_links(ds_app_ids) + + async def get_inputs(self) -> list[int]: + return await self.adb_client.run_query_builder( + DSAppSyncAgenciesDeleteGetQueryBuilder() + ) + + async def make_request( + self, + ds_app_ids: list[int] + ) -> None: + await self.pdap_client.run_request_builder( + DeleteAgenciesRequestBuilder(ds_app_ids) + ) + + async def delete_flags( + self, + ds_app_ids: list[int] + ) -> None: + await self.run_query_builder( + DSAppSyncAgenciesDeleteRemoveFlagsQueryBuilder( + ds_agency_ids=ds_app_ids + ) + ) + + async def delete_links( + self, + ds_app_ids: list[int] + ) -> None: + await self.run_query_builder( + DSAppSyncAgenciesDeleteRemoveLinksQueryBuilder( + ds_agency_ids=ds_app_ids + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/cte.py new file mode 100644 index 00000000..d93f6a1d --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/cte.py @@ -0,0 +1,29 @@ +""" +Agencies to be deleted from the DS database must be flagged for deletion +""" +from sqlalchemy import select, Column, CTE + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.flag.ds_delete.agency import FlagDSDeleteAgency + + +class DSAppLinkSyncAgencyDeletePrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + DSAppLinkAgency.ds_agency_id + ) + .join( + FlagDSDeleteAgency, + FlagDSDeleteAgency.ds_agency_id == DSAppLinkAgency.ds_agency_id + ).cte("ds_app_link_sync_agency_delete_prerequisites") + ) + + @property + def ds_agency_id(self) -> Column[int]: + return self._cte.columns.ds_agency_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/delete_flags.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/delete_flags.py new file mode 100644 index 00000000..f1633337 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/delete_flags.py @@ -0,0 +1,22 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.flag.ds_delete.agency import FlagDSDeleteAgency +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesDeleteRemoveFlagsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_agency_ids: list[int] + ): + super().__init__() + self._ds_agency_ids = ds_agency_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(FlagDSDeleteAgency) + .where(FlagDSDeleteAgency.ds_agency_id.in_(self._ds_agency_ids)) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/delete_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/delete_links.py new file mode 100644 index 00000000..0ad20ee0 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/delete_links.py @@ -0,0 +1,22 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesDeleteRemoveLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_agency_ids: list[int] + ): + super().__init__() + self._ds_agency_ids = ds_agency_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(DSAppLinkAgency) + .where(DSAppLinkAgency.ds_agency_id.in_(self._ds_agency_ids)) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/get.py new file mode 100644 index 00000000..36dddee4 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/get.py @@ -0,0 +1,28 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.queries.cte import \ + DSAppLinkSyncAgencyDeletePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesDeleteGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> list[int]: + """Get DS App links to delete.""" + cte = DSAppLinkSyncAgencyDeletePrerequisitesCTEContainer() + + query = ( + select( + cte.ds_agency_id, + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + return [mapping[cte.ds_agency_id] for mapping in mappings] \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/prereq.py new file mode 100644 index 00000000..fdafab72 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/delete/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.queries.cte import \ + DSAppLinkSyncAgencyDeletePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesDeletePrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncAgencyDeletePrerequisitesCTEContainer().ds_agency_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/core.py new file mode 100644 index 00000000..24481e8d --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/core.py @@ -0,0 +1,56 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.queries.get import \ + DSAppSyncAgenciesUpdateGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.queries.prereq import \ + DSAppSyncAgenciesUpdatePrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.queries.update_links import \ + DSAppSyncAgenciesUpdateAlterLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.agencies.update.core import UpdateAgenciesRequestBuilder +from src.external.pdap.impl.sync.agencies.update.request import UpdateAgenciesOuterRequest + + +class DSAppSyncAgenciesUpdateTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_AGENCIES_UPDATE + + async def meets_task_prerequisites(self) -> bool: + return await self.adb_client.run_query_builder( + DSAppSyncAgenciesUpdatePrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + request: UpdateAgenciesOuterRequest = await self.get_inputs() + await self.make_request(request) + ds_app_ids: list[int] = [ + agency.app_id + for agency in request.agencies + ] + await self.update_links(ds_app_ids) + + async def get_inputs(self) -> UpdateAgenciesOuterRequest: + return await self.adb_client.run_query_builder( + DSAppSyncAgenciesUpdateGetQueryBuilder() + ) + + async def make_request( + self, + request: UpdateAgenciesOuterRequest + ): + await self.pdap_client.run_request_builder( + UpdateAgenciesRequestBuilder(request) + ) + + async def update_links( + self, + ds_app_ids: list[int] + ) -> None: + await self.adb_client.run_query_builder( + DSAppSyncAgenciesUpdateAlterLinksQueryBuilder( + ds_agency_ids=ds_app_ids + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/cte.py new file mode 100644 index 00000000..57a9957c --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/cte.py @@ -0,0 +1,34 @@ +from sqlalchemy import select, Column, CTE + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.sqlalchemy import Agency + + +class DSAppLinkSyncAgencyUpdatePrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + DSAppLinkAgency.agency_id, + DSAppLinkAgency.ds_agency_id, + ) + .join( + Agency, + Agency.id == DSAppLinkAgency.agency_id, + ) + .where( + Agency.updated_at > DSAppLinkAgency.last_synced_at + ).cte("ds_app_link_sync_agency_update_prerequisites") + ) + + @property + def ds_agency_id(self) -> Column[int]: + return self._cte.columns.ds_agency_id + + @property + def agency_id(self) -> Column[int]: + return self._cte.columns.agency_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/get.py new file mode 100644 index 00000000..81572a24 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/get.py @@ -0,0 +1,75 @@ +from typing import Sequence + +from sqlalchemy import select, func, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.queries.cte import \ + DSAppLinkSyncAgencyUpdatePrerequisitesCTEContainer +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel +from src.external.pdap.impl.sync.agencies.update.request import UpdateAgenciesOuterRequest, UpdateAgenciesInnerRequest + + +class DSAppSyncAgenciesUpdateGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> UpdateAgenciesOuterRequest: + cte = DSAppLinkSyncAgencyUpdatePrerequisitesCTEContainer() + + location_id_cte = ( + select( + LinkAgencyLocation.agency_id, + func.array_agg(LinkAgencyLocation.location_id).label("location_ids"), + ) + .join( + Agency, + Agency.id == LinkAgencyLocation.agency_id, + ) + .group_by( + LinkAgencyLocation.agency_id, + ) + .cte() + ) + + query = ( + select( + cte.ds_agency_id, + Agency.name, + Agency.jurisdiction_type, + Agency.agency_type, + location_id_cte.c.location_ids, + ) + .join( + Agency, + Agency.id == cte.agency_id, + ) + .join( + location_id_cte, + location_id_cte.c.agency_id == cte.agency_id, + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + inner_requests: list[UpdateAgenciesInnerRequest] = [] + for mapping in mappings: + inner_requests.append( + UpdateAgenciesInnerRequest( + app_id=mapping[cte.ds_agency_id], + content=AgencySyncContentModel( + name=mapping[Agency.name], + jurisdiction_type=mapping[Agency.jurisdiction_type], + agency_type=mapping[Agency.agency_type], + location_ids=mapping["location_ids"] + ) + ) + ) + + return UpdateAgenciesOuterRequest( + agencies=inner_requests, + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/prereq.py new file mode 100644 index 00000000..5327f4a8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.queries.cte import \ + DSAppLinkSyncAgencyUpdatePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesUpdatePrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncAgencyUpdatePrerequisitesCTEContainer().agency_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/update_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/update_links.py new file mode 100644 index 00000000..8950ccd6 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/agencies/update/queries/update_links.py @@ -0,0 +1,25 @@ +from sqlalchemy import update, func +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncAgenciesUpdateAlterLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_agency_ids: list[int] + ): + super().__init__() + self._ds_agency_ids = ds_agency_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + update(DSAppLinkAgency) + .where(DSAppLinkAgency.ds_agency_id.in_(self._ds_agency_ids)) + .values({ + DSAppLinkAgency.last_synced_at: func.now(), + }) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/core.py new file mode 100644 index 00000000..760583fd --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/core.py @@ -0,0 +1,53 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.add_links import \ + DSAppSyncDataSourcesAddInsertLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.get import \ + DSAppSyncDataSourcesAddGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.prereq import \ + DSAppSyncDataSourcesAddPrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.data_sources.add.core import AddDataSourcesRequestBuilder +from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel + + +class DSAppSyncDataSourcesAddTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_DATA_SOURCES_ADD + + async def meets_task_prerequisites(self) -> bool: + return await self.run_query_builder( + DSAppSyncDataSourcesAddPrerequisitesQueryBuilder() + ) + + + async def inner_task_logic(self) -> None: + request: AddDataSourcesOuterRequest = await self.get_request_input() + responses: list[DSAppSyncAddResponseInnerModel] = await self.make_request(request) + await self.insert_ds_app_links(responses) + + + async def get_request_input(self) -> AddDataSourcesOuterRequest: + return await self.run_query_builder( + DSAppSyncDataSourcesAddGetQueryBuilder() + ) + + async def make_request( + self, + request: AddDataSourcesOuterRequest + ) -> list[DSAppSyncAddResponseInnerModel]: + return await self.pdap_client.run_request_builder( + AddDataSourcesRequestBuilder(request) + ) + + async def insert_ds_app_links( + self, + responses: list[DSAppSyncAddResponseInnerModel] + ) -> None: + await self.run_query_builder( + DSAppSyncDataSourcesAddInsertLinksQueryBuilder(responses) + ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/add_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/add_links.py new file mode 100644 index 00000000..88c88d4b --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/add_links.py @@ -0,0 +1,26 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel + + +class DSAppSyncDataSourcesAddInsertLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + mappings: list[DSAppSyncAddResponseInnerModel] + ): + super().__init__() + self._mappings = mappings + + async def run(self, session: AsyncSession) -> None: + inserts: list[DSAppLinkDataSource] = [] + for mapping in self._mappings: + inserts.append( + DSAppLinkDataSource( + ds_data_source_id=mapping.app_id, + url_id=mapping.request_id, + ) + ) + session.add_all(inserts) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/cte.py new file mode 100644 index 00000000..8c8bc945 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/cte.py @@ -0,0 +1,39 @@ +""" +Data sources to be added to the DS database must not have a +ds app link entry +""" +from sqlalchemy import select, exists, CTE, Column + +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource + + +class DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + URL.id + ) + .join( + FlagURLValidated, + FlagURLValidated.url_id == URL.id, + ) + .where( + FlagURLValidated.type == URLType.DATA_SOURCE, + ~exists( + select(DSAppLinkDataSource.url_id) + .where(DSAppLinkDataSource.url_id == URL.id) + ) + ).cte("ds_app_link_sync_data_source_add_prerequisites") + ) + + @property + def url_id(self) -> Column[int]: + return self._cte.columns.id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py new file mode 100644 index 00000000..47beb2a3 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/get.py @@ -0,0 +1,117 @@ +from typing import Sequence + +from sqlalchemy import RowMapping, func, select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.cte import \ + DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest + + +class DSAppSyncDataSourcesAddGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> AddDataSourcesOuterRequest: + cte = DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer() + + agency_id_cte = ( + select( + LinkURLAgency.url_id, + func.array_agg(LinkURLAgency.agency_id).label("agency_ids") + ) + .group_by( + LinkURLAgency.url_id + ) + .cte() + ) + + query = ( + select( + cte.url_id, + # Required + URL.full_url, + URL.name, + URLRecordType.record_type, + agency_id_cte.c.agency_ids, + # Optional + URL.description, + URLOptionalDataSourceMetadata.record_formats, + URLOptionalDataSourceMetadata.data_portal_type, + URLOptionalDataSourceMetadata.supplying_entity, + URLOptionalDataSourceMetadata.coverage_start, + URLOptionalDataSourceMetadata.coverage_end, + URLOptionalDataSourceMetadata.agency_supplied, + URLOptionalDataSourceMetadata.agency_originated, + URLOptionalDataSourceMetadata.update_method, + URLOptionalDataSourceMetadata.readme_url, + URLOptionalDataSourceMetadata.originating_entity, + URLOptionalDataSourceMetadata.retention_schedule, + URLOptionalDataSourceMetadata.scraper_url, + URLOptionalDataSourceMetadata.access_notes, + URLOptionalDataSourceMetadata.access_types, + ) + .select_from( + cte.cte + ) + .join( + URL, + URL.id == cte.url_id, + ) + .outerjoin( + URLOptionalDataSourceMetadata, + URL.id == URLOptionalDataSourceMetadata.url_id, + ) + .join( + URLRecordType, + URLRecordType.url_id == URL.id, + ) + .join( + agency_id_cte, + cte.url_id == agency_id_cte.c.url_id + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + inner_requests: list[AddDataSourcesInnerRequest] = [] + for mapping in mappings: + inner_requests.append( + AddDataSourcesInnerRequest( + request_id=mapping[cte.url_id], + content=DataSourceSyncContentModel( + # Required + source_url=mapping["full_url"], + name=mapping[URL.name], + record_type=mapping[URLRecordType.record_type], + agency_ids=mapping["agency_ids"], + # Optional + description=mapping[URL.description], + record_formats=mapping[URLOptionalDataSourceMetadata.record_formats], + data_portal_type=mapping[URLOptionalDataSourceMetadata.data_portal_type], + supplying_entity=mapping[URLOptionalDataSourceMetadata.supplying_entity], + coverage_start=mapping[URLOptionalDataSourceMetadata.coverage_start], + coverage_end=mapping[URLOptionalDataSourceMetadata.coverage_end], + agency_supplied=mapping[URLOptionalDataSourceMetadata.agency_supplied], + agency_originated=mapping[URLOptionalDataSourceMetadata.agency_originated], + update_method=mapping[URLOptionalDataSourceMetadata.update_method], + readme_url=mapping[URLOptionalDataSourceMetadata.readme_url], + originating_entity=mapping[URLOptionalDataSourceMetadata.originating_entity], + retention_schedule=mapping[URLOptionalDataSourceMetadata.retention_schedule], + scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], + access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], + access_types=mapping[URLOptionalDataSourceMetadata.access_types], + ) + ) + ) + + return AddDataSourcesOuterRequest( + data_sources=inner_requests, + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/prereq.py new file mode 100644 index 00000000..d375f524 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/add/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.queries.cte import \ + DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesAddPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncDataSourceAddPrerequisitesCTEContainer().url_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/core.py new file mode 100644 index 00000000..14450a51 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/core.py @@ -0,0 +1,64 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.queries.delete_flags import \ + DSAppSyncDataSourcesDeleteRemoveFlagsQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.queries.delete_links import \ + DSAppSyncDataSourcesDeleteRemoveLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.queries.get import \ + DSAppSyncDataSourcesDeleteGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.queries.prereq import \ + DSAppSyncDataSourcesDeletePrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.data_sources.delete.core import DeleteDataSourcesRequestBuilder + + +class DSAppSyncDataSourcesDeleteTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_DATA_SOURCES_DELETE + + async def meets_task_prerequisites(self) -> bool: + return await self.run_query_builder( + DSAppSyncDataSourcesDeletePrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + ds_app_ids: list[int] = await self.get_inputs() + await self.make_request(ds_app_ids) + await self.delete_flags(ds_app_ids) + await self.delete_links(ds_app_ids) + + async def get_inputs(self) -> list[int]: + return await self.run_query_builder( + DSAppSyncDataSourcesDeleteGetQueryBuilder() + ) + + async def make_request( + self, + ds_app_ids: list[int] + ) -> None: + await self.pdap_client.run_request_builder( + DeleteDataSourcesRequestBuilder(ds_app_ids) + ) + + async def delete_flags( + self, + ds_app_ids: list[int] + ) -> None: + await self.run_query_builder( + DSAppSyncDataSourcesDeleteRemoveFlagsQueryBuilder( + ds_data_source_ids=ds_app_ids + ) + ) + + async def delete_links( + self, + ds_app_ids: list[int] + ) -> None: + await self.run_query_builder( + DSAppSyncDataSourcesDeleteRemoveLinksQueryBuilder( + ds_data_source_ids=ds_app_ids + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/cte.py new file mode 100644 index 00000000..4e14dbf8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/cte.py @@ -0,0 +1,29 @@ +""" +Data sources to be deleted from the DS database must be flagged for deletion +""" +from sqlalchemy import select, Column, CTE + +from src.db.models.impl.flag.ds_delete.data_source import FlagDSDeleteDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource + + +class DSAppLinkSyncDataSourceDeletePrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + DSAppLinkDataSource.ds_data_source_id + ) + .join( + FlagDSDeleteDataSource, + FlagDSDeleteDataSource.ds_data_source_id == FlagDSDeleteDataSource.ds_data_source_id + ).cte("ds_app_link_sync_data_source_delete_prerequisites") + ) + + @property + def ds_data_source_id(self) -> Column[int]: + return self._cte.columns.ds_data_source_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/delete_flags.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/delete_flags.py new file mode 100644 index 00000000..ef869a9c --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/delete_flags.py @@ -0,0 +1,22 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.flag.ds_delete.data_source import FlagDSDeleteDataSource +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesDeleteRemoveFlagsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_data_source_ids: list[int] + ): + super().__init__() + self._ds_data_source_ids = ds_data_source_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(FlagDSDeleteDataSource) + .where(FlagDSDeleteDataSource.ds_data_source_id.in_(self._ds_data_source_ids)) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/delete_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/delete_links.py new file mode 100644 index 00000000..9b417ce8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/delete_links.py @@ -0,0 +1,22 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesDeleteRemoveLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_data_source_ids: list[int] + ): + super().__init__() + self._ds_data_source_ids = ds_data_source_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(DSAppLinkDataSource) + .where(DSAppLinkDataSource.ds_data_source_id.in_(self._ds_data_source_ids)) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/get.py new file mode 100644 index 00000000..7077beac --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/get.py @@ -0,0 +1,28 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.queries.cte import \ + DSAppLinkSyncDataSourceDeletePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesDeleteGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> list[int]: + """Get DS App links to delete.""" + cte = DSAppLinkSyncDataSourceDeletePrerequisitesCTEContainer() + + query = ( + select( + cte.ds_data_source_id, + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + return [mapping[cte.ds_data_source_id] for mapping in mappings] \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/prereq.py new file mode 100644 index 00000000..1f3e797a --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/delete/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.queries.cte import \ + DSAppLinkSyncDataSourceDeletePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesDeletePrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncDataSourceDeletePrerequisitesCTEContainer().ds_data_source_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/core.py new file mode 100644 index 00000000..fd925146 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/core.py @@ -0,0 +1,56 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.get import \ + DSAppSyncDataSourcesUpdateGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.prereq import \ + DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.update_links import \ + DSAppSyncDataSourcesUpdateAlterLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.data_sources.update.core import UpdateDataSourcesRequestBuilder +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesOuterRequest + + +class DSAppSyncDataSourcesUpdateTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_DATA_SOURCES_UPDATE + + async def meets_task_prerequisites(self) -> bool: + return await self.adb_client.run_query_builder( + DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + request: UpdateDataSourcesOuterRequest = await self.get_inputs() + await self.make_request(request) + ds_app_ids: list[int] = [ + ds.app_id + for ds in request.data_sources + ] + await self.update_links(ds_app_ids) + + async def get_inputs(self) -> UpdateDataSourcesOuterRequest: + return await self.adb_client.run_query_builder( + DSAppSyncDataSourcesUpdateGetQueryBuilder() + ) + + async def make_request( + self, + request: UpdateDataSourcesOuterRequest + ): + await self.pdap_client.run_request_builder( + UpdateDataSourcesRequestBuilder(request) + ) + + async def update_links( + self, + ds_app_ids: list[int] + ) -> None: + await self.adb_client.run_query_builder( + DSAppSyncDataSourcesUpdateAlterLinksQueryBuilder( + ds_data_source_ids=ds_app_ids + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py new file mode 100644 index 00000000..8f0ff65e --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/cte.py @@ -0,0 +1,49 @@ +from sqlalchemy import select, or_, Column, CTE + +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType + + +class DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + DSAppLinkDataSource.url_id, + DSAppLinkDataSource.ds_data_source_id, + ) + .join( + URL, + URL.id == DSAppLinkDataSource.url_id, + ) + .outerjoin( + URLRecordType, + URL.id == URLRecordType.url_id, + ) + .outerjoin( + URLOptionalDataSourceMetadata, + URL.id == URLOptionalDataSourceMetadata.url_id, + ) + .where( + or_( + URL.updated_at > DSAppLinkDataSource.last_synced_at, + URLOptionalDataSourceMetadata.updated_at > DSAppLinkDataSource.last_synced_at, + URLRecordType.created_at > DSAppLinkDataSource.last_synced_at, + URLRecordType.updated_at > DSAppLinkDataSource.last_synced_at, + ) + ).cte("ds_app_link_sync_data_source_update_prerequisites") + ) + + @property + def url_id(self) -> Column[int]: + return self._cte.columns.url_id + + @property + def ds_data_source_id(self) -> Column[int]: + return self._cte.columns.ds_data_source_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py new file mode 100644 index 00000000..855075e3 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/get.py @@ -0,0 +1,120 @@ +from typing import Sequence + +from sqlalchemy import select, func, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.cte import \ + DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesOuterRequest, \ + UpdateDataSourcesInnerRequest + + +class DSAppSyncDataSourcesUpdateGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> UpdateDataSourcesOuterRequest: + cte = DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer() + + agency_id_cte = ( + select( + LinkURLAgency.url_id, + func.array_agg(LinkURLAgency.agency_id).label("agency_ids") + ) + .group_by( + LinkURLAgency.url_id + ) + .cte() + ) + + query = ( + select( + cte.ds_data_source_id, + # Required + URL.full_url, + URL.name, + URLRecordType.record_type, + agency_id_cte.c.agency_ids, + # Optional + URL.description, + URLOptionalDataSourceMetadata.record_formats, + URLOptionalDataSourceMetadata.data_portal_type, + URLOptionalDataSourceMetadata.supplying_entity, + URLOptionalDataSourceMetadata.coverage_start, + URLOptionalDataSourceMetadata.coverage_end, + URLOptionalDataSourceMetadata.agency_supplied, + URLOptionalDataSourceMetadata.agency_originated, + URLOptionalDataSourceMetadata.update_method, + URLOptionalDataSourceMetadata.readme_url, + URLOptionalDataSourceMetadata.originating_entity, + URLOptionalDataSourceMetadata.retention_schedule, + URLOptionalDataSourceMetadata.scraper_url, + URLOptionalDataSourceMetadata.access_notes, + URLOptionalDataSourceMetadata.access_types, + URLOptionalDataSourceMetadata.data_portal_type_other + ) + .select_from( + cte.cte + ) + .join( + URL, + URL.id == cte.url_id, + ) + .outerjoin( + URLOptionalDataSourceMetadata, + URL.id == URLOptionalDataSourceMetadata.url_id, + ) + .join( + URLRecordType, + URLRecordType.url_id == URL.id, + ) + .outerjoin( + agency_id_cte, + cte.url_id == agency_id_cte.c.url_id + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + inner_requests: list[UpdateDataSourcesInnerRequest] = [] + for mapping in mappings: + inner_requests.append( + UpdateDataSourcesInnerRequest( + app_id=mapping[cte.ds_data_source_id], + content=DataSourceSyncContentModel( + # Required + source_url=mapping["full_url"], + name=mapping[URL.name], + record_type=mapping[URLRecordType.record_type], + agency_ids=mapping["agency_ids"] or [], + # Optional + description=mapping[URL.description], + record_formats=mapping[URLOptionalDataSourceMetadata.record_formats], + data_portal_type=mapping[URLOptionalDataSourceMetadata.data_portal_type], + supplying_entity=mapping[URLOptionalDataSourceMetadata.supplying_entity], + coverage_start=mapping[URLOptionalDataSourceMetadata.coverage_start], + coverage_end=mapping[URLOptionalDataSourceMetadata.coverage_end], + agency_supplied=mapping[URLOptionalDataSourceMetadata.agency_supplied], + agency_originated=mapping[URLOptionalDataSourceMetadata.agency_originated], + update_method=mapping[URLOptionalDataSourceMetadata.update_method], + readme_url=mapping[URLOptionalDataSourceMetadata.readme_url], + originating_entity=mapping[URLOptionalDataSourceMetadata.originating_entity], + retention_schedule=mapping[URLOptionalDataSourceMetadata.retention_schedule], + scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], + access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], + access_types=mapping[URLOptionalDataSourceMetadata.access_types], + data_portal_type_other=mapping[URLOptionalDataSourceMetadata.data_portal_type_other], + ) + ) + ) + + return UpdateDataSourcesOuterRequest( + data_sources=inner_requests, + ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/prereq.py new file mode 100644 index 00000000..e31ff1d7 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.queries.cte import \ + DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesUpdatePrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncDataSourceUpdatePrerequisitesCTEContainer().ds_data_source_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/update_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/update_links.py new file mode 100644 index 00000000..ffba7ec8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/data_sources/update/queries/update_links.py @@ -0,0 +1,25 @@ +from sqlalchemy import update, func +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncDataSourcesUpdateAlterLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_data_source_ids: list[int] + ): + super().__init__() + self._ds_data_source_ids = ds_data_source_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + update(DSAppLinkDataSource) + .where(DSAppLinkDataSource.ds_data_source_id.in_(self._ds_data_source_ids)) + .values({ + DSAppLinkDataSource.last_synced_at: func.now(), + }) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/core.py new file mode 100644 index 00000000..6823c205 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/core.py @@ -0,0 +1,50 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.add_links import \ + DSAppSyncMetaURLsAddInsertLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.get import DSAppSyncMetaURLsAddGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.prereq import \ + DSAppSyncMetaURLsAddPrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.meta_urls.add.core import AddMetaURLsRequestBuilder +from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel + + +class DSAppSyncMetaURLsAddTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_META_URLS_ADD + + async def meets_task_prerequisites(self) -> bool: + return await self.run_query_builder( + DSAppSyncMetaURLsAddPrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + request: AddMetaURLsOuterRequest = await self.get_request_input() + responses: list[DSAppSyncAddResponseInnerModel] = await self.make_request(request) + await self.insert_ds_app_links(responses) + + async def get_request_input(self) -> AddMetaURLsOuterRequest: + return await self.run_query_builder( + DSAppSyncMetaURLsAddGetQueryBuilder() + ) + + async def make_request( + self, + request: AddMetaURLsOuterRequest + ) -> list[DSAppSyncAddResponseInnerModel]: + return await self.pdap_client.run_request_builder( + AddMetaURLsRequestBuilder(request) + ) + + async def insert_ds_app_links( + self, + responses: list[DSAppSyncAddResponseInnerModel] + ) -> None: + await self.run_query_builder( + DSAppSyncMetaURLsAddInsertLinksQueryBuilder(responses) + ) diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/add_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/add_links.py new file mode 100644 index 00000000..52a288f3 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/add_links.py @@ -0,0 +1,26 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel + + +class DSAppSyncMetaURLsAddInsertLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + mappings: list[DSAppSyncAddResponseInnerModel] + ): + super().__init__() + self._mappings = mappings + + async def run(self, session: AsyncSession) -> None: + inserts: list[DSAppLinkMetaURL] = [] + for mapping in self._mappings: + inserts.append( + DSAppLinkMetaURL( + ds_meta_url_id=mapping.app_id, + url_id=mapping.request_id, + ) + ) + session.add_all(inserts) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/cte.py new file mode 100644 index 00000000..178e19e8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/cte.py @@ -0,0 +1,32 @@ +""" +Meta URLs to be added to the DS database must not have a +ds app link entry +""" +from sqlalchemy import select, exists, Column, CTE + +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.db.models.views.meta_url import MetaURL + + +class DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + MetaURL.url_id + ) + .where( + ~exists( + select(DSAppLinkMetaURL.url_id) + .where(DSAppLinkMetaURL.url_id == MetaURL.url_id) + ) + ).cte("ds_app_link_sync_meta_url_add_prerequisites") + ) + + @property + def url_id(self) -> Column[int]: + return self._cte.columns.url_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py new file mode 100644 index 00000000..42a9149b --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/get.py @@ -0,0 +1,69 @@ +from typing import Sequence + +from sqlalchemy import select, func, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.cte import \ + DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel +from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest, AddMetaURLsInnerRequest + + +class DSAppSyncMetaURLsAddGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> AddMetaURLsOuterRequest: + cte = DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer() + + agency_id_cte = ( + select( + LinkURLAgency.url_id, + func.array_agg(LinkURLAgency.agency_id).label("agency_ids") + ) + .group_by( + LinkURLAgency.url_id + ) + .cte() + ) + + query = ( + select( + cte.url_id, + URL.full_url, + agency_id_cte.c.agency_ids + ) + .select_from( + cte.cte + ) + .join( + URL, + URL.id == cte.url_id, + ) + .join( + agency_id_cte, + cte.url_id == agency_id_cte.c.url_id + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + inner_requests: list[AddMetaURLsInnerRequest] = [] + for mapping in mappings: + inner_requests.append( + AddMetaURLsInnerRequest( + request_id=mapping[cte.url_id], + content=MetaURLSyncContentModel( + url=mapping["full_url"], + agency_ids=mapping["agency_ids"] + ) + ) + ) + + return AddMetaURLsOuterRequest( + meta_urls=inner_requests, + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/prereq.py new file mode 100644 index 00000000..9439b6d0 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/add/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.queries.cte import \ + DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsAddPrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncMetaURLAddPrerequisitesCTEContainer().url_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/core.py new file mode 100644 index 00000000..32f5ef85 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/core.py @@ -0,0 +1,64 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.queries.delete_flags import \ + DSAppSyncMetaURLsDeleteRemoveFlagsQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.queries.delete_links import \ + DSAppSyncMetaURLsDeleteRemoveLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.queries.get import \ + DSAppSyncMetaURLsDeleteGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.queries.prereq import \ + DSAppSyncMetaURLsDeletePrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.meta_urls.delete.core import DeleteMetaURLsRequestBuilder + + +class DSAppSyncMetaURLsDeleteTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_META_URLS_DELETE + + async def meets_task_prerequisites(self) -> bool: + return await self.run_query_builder( + DSAppSyncMetaURLsDeletePrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + ds_app_ids: list[int] = await self.get_inputs() + await self.make_request(ds_app_ids) + await self.delete_flags(ds_app_ids) + await self.delete_links(ds_app_ids) + + async def get_inputs(self) -> list[int]: + return await self.run_query_builder( + DSAppSyncMetaURLsDeleteGetQueryBuilder() + ) + + async def make_request( + self, + ds_app_ids: list[int] + ) -> None: + await self.pdap_client.run_request_builder( + DeleteMetaURLsRequestBuilder(ds_app_ids) + ) + + async def delete_flags( + self, + ds_app_ids: list[int] + ) -> None: + await self.run_query_builder( + DSAppSyncMetaURLsDeleteRemoveFlagsQueryBuilder( + ds_meta_url_ids=ds_app_ids + ) + ) + + async def delete_links( + self, + ds_app_ids: list[int] + ) -> None: + await self.run_query_builder( + DSAppSyncMetaURLsDeleteRemoveLinksQueryBuilder( + ds_meta_url_ids=ds_app_ids + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/cte.py new file mode 100644 index 00000000..91887e48 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/cte.py @@ -0,0 +1,29 @@ +""" +Meta URLs to be deleted from the DS database must be flagged for deletion +""" +from sqlalchemy import Column, CTE, select + +from src.db.models.impl.flag.ds_delete.meta_url import FlagDSDeleteMetaURL +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL + + +class DSAppLinkSyncMetaURLDeletePrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + DSAppLinkMetaURL.ds_meta_url_id + ) + .join( + FlagDSDeleteMetaURL, + FlagDSDeleteMetaURL.ds_meta_url_id == DSAppLinkMetaURL.ds_meta_url_id + ).cte("ds_app_link_sync_meta_url_delete_prerequisites") + ) + + @property + def ds_meta_url_id(self) -> Column[int]: + return self._cte.columns.ds_meta_url_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/delete_flags.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/delete_flags.py new file mode 100644 index 00000000..4bee4ccc --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/delete_flags.py @@ -0,0 +1,22 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.flag.ds_delete.meta_url import FlagDSDeleteMetaURL +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsDeleteRemoveFlagsQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_meta_url_ids: list[int] + ): + super().__init__() + self._ds_meta_url_ids = ds_meta_url_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(FlagDSDeleteMetaURL) + .where(FlagDSDeleteMetaURL.ds_meta_url_id.in_(self._ds_meta_url_ids)) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/delete_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/delete_links.py new file mode 100644 index 00000000..0fb66bb5 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/delete_links.py @@ -0,0 +1,22 @@ +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsDeleteRemoveLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_meta_url_ids: list[int] + ): + super().__init__() + self._ds_meta_url_ids = ds_meta_url_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + delete(DSAppLinkMetaURL) + .where(DSAppLinkMetaURL.ds_meta_url_id.in_(self._ds_meta_url_ids)) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/get.py new file mode 100644 index 00000000..f1d232f7 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/get.py @@ -0,0 +1,28 @@ +from typing import Sequence + +from sqlalchemy import select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.queries.cte import \ + DSAppLinkSyncMetaURLDeletePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsDeleteGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> list[int]: + """Get DS App links to delete.""" + cte = DSAppLinkSyncMetaURLDeletePrerequisitesCTEContainer() + + query = ( + select( + cte.ds_meta_url_id, + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + return [mapping[cte.ds_meta_url_id] for mapping in mappings] \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/prereq.py new file mode 100644 index 00000000..8bc7dbd8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/delete/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.queries.cte import \ + DSAppLinkSyncMetaURLDeletePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsDeletePrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncMetaURLDeletePrerequisitesCTEContainer().ds_meta_url_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/core.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/core.py new file mode 100644 index 00000000..3ef8dc28 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/core.py @@ -0,0 +1,56 @@ +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.get import \ + DSAppSyncMetaURLsUpdateGetQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.prereq import \ + DSAppSyncMetaURLsUpdatePrerequisitesQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.update_links import \ + DSAppSyncMetaURLsUpdateAlterLinksQueryBuilder +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase +from src.db.enums import TaskType +from src.external.pdap.impl.sync.meta_urls.update.core import UpdateMetaURLsRequestBuilder +from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest + + +class DSAppSyncMetaURLsUpdateTaskOperator( + DSSyncTaskOperatorBase +): + + @property + def task_type(self) -> TaskType: + return TaskType.SYNC_META_URLS_UPDATE + + async def meets_task_prerequisites(self) -> bool: + return await self.adb_client.run_query_builder( + DSAppSyncMetaURLsUpdatePrerequisitesQueryBuilder() + ) + + async def inner_task_logic(self) -> None: + request: UpdateMetaURLsOuterRequest = await self.get_inputs() + await self.make_request(request) + ds_app_ids: list[int] = [ + meta_url.app_id + for meta_url in request.meta_urls + ] + await self.update_links(ds_app_ids) + + async def get_inputs(self) -> UpdateMetaURLsOuterRequest: + return await self.adb_client.run_query_builder( + DSAppSyncMetaURLsUpdateGetQueryBuilder() + ) + + async def make_request( + self, + request: UpdateMetaURLsOuterRequest + ): + await self.pdap_client.run_request_builder( + UpdateMetaURLsRequestBuilder(request) + ) + + async def update_links( + self, + ds_app_ids: list[int] + ) -> None: + await self.adb_client.run_query_builder( + DSAppSyncMetaURLsUpdateAlterLinksQueryBuilder( + ds_meta_url_ids=ds_app_ids + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/cte.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/cte.py new file mode 100644 index 00000000..20123566 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/cte.py @@ -0,0 +1,33 @@ +from sqlalchemy import select, Column, CTE + +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL + +class DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer: + + def __init__(self): + self._cte = ( + select( + DSAppLinkMetaURL.url_id, + DSAppLinkMetaURL.ds_meta_url_id, + ) + .join( + URL, + URL.id == DSAppLinkMetaURL.url_id, + ) + .where( + URL.updated_at > DSAppLinkMetaURL.last_synced_at, + ).cte("ds_app_link_sync_meta_url_update_prerequisites") + ) + + @property + def url_id(self) -> Column[int]: + return self._cte.columns.url_id + + @property + def ds_meta_url_id(self) -> Column[int]: + return self._cte.columns.ds_meta_url_id + + @property + def cte(self) -> CTE: + return self._cte \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py new file mode 100644 index 00000000..210909f9 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/get.py @@ -0,0 +1,69 @@ +from typing import Sequence + +from sqlalchemy import select, func, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.cte import \ + DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.queries.base.builder import QueryBuilderBase +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel +from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest + + +class DSAppSyncMetaURLsUpdateGetQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> UpdateMetaURLsOuterRequest: + cte = DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer() + + agency_id_cte = ( + select( + LinkURLAgency.url_id, + func.array_agg(LinkURLAgency.agency_id).label("agency_ids") + ) + .group_by( + LinkURLAgency.url_id + ) + .cte() + ) + + query = ( + select( + cte.ds_meta_url_id, + URL.full_url, + agency_id_cte.c.agency_ids + ) + .select_from( + cte.cte + ) + .join( + URL, + URL.id == cte.url_id, + ) + .outerjoin( + agency_id_cte, + cte.url_id == agency_id_cte.c.url_id + ) + ) + + mappings: Sequence[RowMapping] = await self.sh.mappings( + session=session, + query=query, + ) + + inner_requests: list[UpdateMetaURLsInnerRequest] = [] + for mapping in mappings: + inner_requests.append( + UpdateMetaURLsInnerRequest( + app_id=mapping[cte.ds_meta_url_id], + content=MetaURLSyncContentModel( + url=mapping['full_url'], + agency_ids=mapping["agency_ids"] or [] + ) + ) + ) + + return UpdateMetaURLsOuterRequest( + meta_urls=inner_requests, + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/prereq.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/prereq.py new file mode 100644 index 00000000..761bb2c5 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/prereq.py @@ -0,0 +1,17 @@ +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.queries.cte import \ + DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsUpdatePrerequisitesQueryBuilder(QueryBuilderBase): + + async def run(self, session: AsyncSession) -> bool: + return await self.sh.results_exist( + session=session, + query=select( + DSAppLinkSyncMetaURLUpdatePrerequisitesCTEContainer().ds_meta_url_id + ) + ) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/update_links.py b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/update_links.py new file mode 100644 index 00000000..baafcaa8 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/impl/meta_urls/update/queries/update_links.py @@ -0,0 +1,25 @@ +from sqlalchemy import update, func +from sqlalchemy.ext.asyncio import AsyncSession + +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.db.queries.base.builder import QueryBuilderBase + + +class DSAppSyncMetaURLsUpdateAlterLinksQueryBuilder(QueryBuilderBase): + + def __init__( + self, + ds_meta_url_ids: list[int] + ): + super().__init__() + self._ds_meta_url_ids = ds_meta_url_ids + + async def run(self, session: AsyncSession) -> None: + statement = ( + update(DSAppLinkMetaURL) + .where(DSAppLinkMetaURL.ds_meta_url_id.in_(self._ds_meta_url_ids)) + .values({ + DSAppLinkMetaURL.last_synced_at: func.now(), + }) + ) + await session.execute(statement) \ No newline at end of file diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/templates/__init__.py b/src/core/tasks/scheduled/impl/sync_to_ds/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/core/tasks/scheduled/impl/sync_to_ds/templates/operator.py b/src/core/tasks/scheduled/impl/sync_to_ds/templates/operator.py new file mode 100644 index 00000000..62794711 --- /dev/null +++ b/src/core/tasks/scheduled/impl/sync_to_ds/templates/operator.py @@ -0,0 +1,28 @@ +from abc import ABC + +from src.core.tasks.base.operator import TaskOperatorBase +from src.core.tasks.mixins.prereq import HasPrerequisitesMixin +from src.core.tasks.url.enums import TaskOperatorOutcome +from src.db.client.async_ import AsyncDatabaseClient +from src.external.pdap.client import PDAPClient + + +class DSSyncTaskOperatorBase( + TaskOperatorBase, + HasPrerequisitesMixin, + ABC +): + + def __init__( + self, + adb_client: AsyncDatabaseClient, + pdap_client: PDAPClient + ): + super().__init__(adb_client) + self.pdap_client = pdap_client + + async def conclude_task(self): + return await self.run_info( + outcome=TaskOperatorOutcome.SUCCESS, + message="Task completed successfully" + ) diff --git a/src/core/tasks/url/loader.py b/src/core/tasks/url/loader.py index b5910f5e..70c3eebe 100644 --- a/src/core/tasks/url/loader.py +++ b/src/core/tasks/url/loader.py @@ -5,6 +5,17 @@ from environs import Env from src.collectors.impl.muckrock.api_interface.core import MuckrockAPIInterface +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.core import DSAppSyncAgenciesAddTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.core import DSAppSyncAgenciesDeleteTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.core import DSAppSyncDataSourcesAddTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.core import \ + DSAppSyncDataSourcesDeleteTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.core import DSAppSyncMetaURLsAddTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.core import DSAppSyncMetaURLsDeleteTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator from src.core.tasks.url.models.entry import URLTaskEntry from src.core.tasks.url.operators.agency_identification.core import AgencyIdentificationTaskOperator from src.core.tasks.url.operators.agency_identification.subtasks.loader import AgencyIdentificationSubtaskLoader @@ -21,8 +32,6 @@ from src.core.tasks.url.operators.record_type.llm_api.record_classifier.openai import OpenAIRecordClassifier from src.core.tasks.url.operators.root_url.core import URLRootURLTaskOperator from src.core.tasks.url.operators.screenshot.core import URLScreenshotTaskOperator -from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator -from src.core.tasks.url.operators.submit_meta_urls.core import SubmitMetaURLsTaskOperator from src.core.tasks.url.operators.suspend.core import SuspendURLTaskOperator from src.core.tasks.url.operators.validate.core import AutoValidateURLTaskOperator from src.db.client.async_ import AsyncDatabaseClient @@ -96,26 +105,6 @@ def _get_agency_identification_task_operator(self) -> URLTaskEntry: enabled=self.setup_flag("URL_AGENCY_IDENTIFICATION_TASK_FLAG") ) - def _get_submit_approved_url_task_operator(self) -> URLTaskEntry: - operator = SubmitApprovedURLTaskOperator( - adb_client=self.adb_client, - pdap_client=self.pdap_client - ) - return URLTaskEntry( - operator=operator, - enabled=self.setup_flag("URL_SUBMIT_APPROVED_TASK_FLAG") - ) - - def _get_submit_meta_urls_task_operator(self) -> URLTaskEntry: - operator = SubmitMetaURLsTaskOperator( - adb_client=self.adb_client, - pdap_client=self.pdap_client - ) - return URLTaskEntry( - operator=operator, - enabled=self.setup_flag("URL_SUBMIT_META_URLS_TASK_FLAG") - ) - def _get_url_miscellaneous_metadata_task_operator(self) -> URLTaskEntry: operator = URLMiscellaneousMetadataTaskOperator( adb_client=self.adb_client @@ -204,6 +193,109 @@ def _get_suspend_url_task_operator(self) -> URLTaskEntry: enabled=self.setup_flag("URL_SUSPEND_TASK_FLAG") ) + # DS App Sync + ## Agency + ### Add + def _get_ds_app_sync_agency_add_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncAgenciesAddTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_AGENCY_ADD_TASK_FLAG") + ) + + ### Update + def _get_ds_app_sync_agency_update_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncAgenciesUpdateTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_AGENCY_UPDATE_TASK_FLAG") + ) + + ### Delete + def _get_ds_app_sync_agency_delete_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncAgenciesDeleteTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_AGENCY_DELETE_TASK_FLAG") + ) + + ## Data Source + ### Add + def _get_ds_app_sync_data_source_add_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncDataSourcesAddTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_ADD_TASK_FLAG") + ) + + ### Update + def _get_ds_app_sync_data_source_update_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncDataSourcesUpdateTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_UPDATE_TASK_FLAG") + ) + + ### Delete + def _get_ds_app_sync_data_source_delete_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncDataSourcesDeleteTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_DATA_SOURCE_DELETE_TASK_FLAG") + ) + + ## Meta URL + ### Add + def _get_ds_app_sync_meta_url_add_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncMetaURLsAddTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_META_URL_ADD_TASK_FLAG") + ) + + ### Update + def _get_ds_app_sync_meta_url_update_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncMetaURLsUpdateTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_META_URL_UPDATE_TASK_FLAG") + ) + + ### Delete + def _get_ds_app_sync_meta_url_delete_task_operator(self) -> URLTaskEntry: + operator = DSAppSyncMetaURLsDeleteTaskOperator( + adb_client=self.adb_client, + pdap_client=self.pdap_client + ) + return URLTaskEntry( + operator=operator, + enabled=self.setup_flag("DS_APP_SYNC_META_URL_DELETE_TASK_FLAG") + ) + async def load_entries(self) -> list[URLTaskEntry]: return [ @@ -213,12 +305,23 @@ async def load_entries(self) -> list[URLTaskEntry]: self._get_url_record_type_task_operator(), self._get_agency_identification_task_operator(), self._get_url_miscellaneous_metadata_task_operator(), - self._get_submit_approved_url_task_operator(), - self._get_submit_meta_urls_task_operator(), self._get_url_auto_relevance_task_operator(), self._get_url_screenshot_task_operator(), self._get_location_id_task_operator(), self._get_auto_validate_task_operator(), self._get_auto_name_task_operator(), self._get_suspend_url_task_operator(), + # DS App Sync + ## Agency + self._get_ds_app_sync_agency_add_task_operator(), + self._get_ds_app_sync_agency_update_task_operator(), + self._get_ds_app_sync_agency_delete_task_operator(), + ## Data Source + self._get_ds_app_sync_data_source_add_task_operator(), + self._get_ds_app_sync_data_source_update_task_operator(), + self._get_ds_app_sync_data_source_delete_task_operator(), + ## Meta URL + self._get_ds_app_sync_meta_url_add_task_operator(), + self._get_ds_app_sync_meta_url_update_task_operator(), + self._get_ds_app_sync_meta_url_delete_task_operator(), ] diff --git a/src/core/tasks/url/models/entry.py b/src/core/tasks/url/models/entry.py index eeb09047..69269c1e 100644 --- a/src/core/tasks/url/models/entry.py +++ b/src/core/tasks/url/models/entry.py @@ -1,5 +1,7 @@ from pydantic import BaseModel +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.core import DSAppSyncAgenciesAddTaskOperator +from src.core.tasks.scheduled.impl.sync_to_ds.templates.operator import DSSyncTaskOperatorBase from src.core.tasks.url.operators.base import URLTaskOperatorBase @@ -8,5 +10,5 @@ class URLTaskEntry(BaseModel): class Config: arbitrary_types_allowed = True - operator: URLTaskOperatorBase + operator: URLTaskOperatorBase | DSSyncTaskOperatorBase enabled: bool \ No newline at end of file diff --git a/src/core/tasks/url/operators/base.py b/src/core/tasks/url/operators/base.py index e1d70d5e..8fc0b422 100644 --- a/src/core/tasks/url/operators/base.py +++ b/src/core/tasks/url/operators/base.py @@ -22,15 +22,3 @@ async def conclude_task(self): outcome=TaskOperatorOutcome.SUCCESS, message="Task completed successfully" ) - - async def run_info( - self, - outcome: TaskOperatorOutcome, - message: str - ) -> TaskOperatorRunInfo: - return TaskOperatorRunInfo( - task_id=self.task_id, - task_type=self.task_type, - outcome=outcome, - message=message - ) diff --git a/src/core/tasks/url/operators/submit_approved/convert.py b/src/core/tasks/url/operators/submit_approved/convert.py deleted file mode 100644 index 1c4a8298..00000000 --- a/src/core/tasks/url/operators/submit_approved/convert.py +++ /dev/null @@ -1,19 +0,0 @@ -from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo -from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall - - -async def convert_to_task_errors( - submitted_url_infos: list[SubmittedURLInfo] -) -> list[URLTaskErrorSmall]: - task_errors: list[URLTaskErrorSmall] = [] - error_response_objects = [ - response_object for response_object in submitted_url_infos - if response_object.request_error is not None - ] - for error_response_object in error_response_objects: - error_info = URLTaskErrorSmall( - url_id=error_response_object.url_id, - error=error_response_object.request_error, - ) - task_errors.append(error_info) - return task_errors diff --git a/src/core/tasks/url/operators/submit_approved/core.py b/src/core/tasks/url/operators/submit_approved/core.py deleted file mode 100644 index e16a1269..00000000 --- a/src/core/tasks/url/operators/submit_approved/core.py +++ /dev/null @@ -1,50 +0,0 @@ -from src.core.tasks.url.operators.base import URLTaskOperatorBase -from src.core.tasks.url.operators.submit_approved.convert import convert_to_task_errors -from src.core.tasks.url.operators.submit_approved.filter import filter_successes -from src.core.tasks.url.operators.submit_approved.queries.get import GetValidatedURLsQueryBuilder -from src.core.tasks.url.operators.submit_approved.queries.has_validated import HasValidatedURLsQueryBuilder -from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO, SubmittedURLInfo -from src.db.client.async_ import AsyncDatabaseClient -from src.db.enums import TaskType -from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall -from src.external.pdap.client import PDAPClient - - -class SubmitApprovedURLTaskOperator(URLTaskOperatorBase): - - def __init__( - self, - adb_client: AsyncDatabaseClient, - pdap_client: PDAPClient - ): - super().__init__(adb_client) - self.pdap_client = pdap_client - - @property - def task_type(self): - return TaskType.SUBMIT_APPROVED - - async def meets_task_prerequisites(self): - return await self.adb_client.run_query_builder(HasValidatedURLsQueryBuilder()) - - async def inner_task_logic(self): - # Retrieve all URLs that are validated and not submitted - tdos: list[SubmitApprovedURLTDO] = await self.get_validated_urls() - - # Link URLs to this task - await self.link_urls_to_task(url_ids=[tdo.url_id for tdo in tdos]) - - # Submit each URL, recording errors if they exist - submitted_url_infos: list[SubmittedURLInfo] = await self.pdap_client.submit_data_source_urls(tdos) - - task_errors: list[URLTaskErrorSmall] = await convert_to_task_errors(submitted_url_infos) - success_infos = await filter_successes(submitted_url_infos) - - # Update the database for successful submissions - await self.adb_client.mark_urls_as_submitted(infos=success_infos) - - # Update the database for failed submissions - await self.add_task_errors(task_errors) - - async def get_validated_urls(self) -> list[SubmitApprovedURLTDO]: - return await self.adb_client.run_query_builder(GetValidatedURLsQueryBuilder()) diff --git a/src/core/tasks/url/operators/submit_approved/filter.py b/src/core/tasks/url/operators/submit_approved/filter.py deleted file mode 100644 index 4ba2fad8..00000000 --- a/src/core/tasks/url/operators/submit_approved/filter.py +++ /dev/null @@ -1,11 +0,0 @@ -from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo - - -async def filter_successes( - submitted_url_infos: list[SubmittedURLInfo] -) -> list[SubmittedURLInfo]: - success_infos = [ - response_object for response_object in submitted_url_infos - if response_object.data_source_id is not None - ] - return success_infos diff --git a/src/core/tasks/url/operators/submit_approved/queries/cte.py b/src/core/tasks/url/operators/submit_approved/queries/cte.py deleted file mode 100644 index cf7ccb71..00000000 --- a/src/core/tasks/url/operators/submit_approved/queries/cte.py +++ /dev/null @@ -1,31 +0,0 @@ -from sqlalchemy import CTE, select, exists -from sqlalchemy.orm import aliased - -from src.collectors.enums import URLStatus -from src.db.enums import TaskType -from src.db.helpers.query import not_exists_url, no_url_task_error -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource - -VALIDATED_URLS_WITHOUT_DS_SQ =( - select(URL) - .join( - FlagURLValidated, - FlagURLValidated.url_id == URL.id - ) - .where( - URL.status == URLStatus.OK, - URL.name.isnot(None), - FlagURLValidated.type == URLType.DATA_SOURCE, - not_exists_url(URLDataSource), - no_url_task_error(TaskType.SUBMIT_APPROVED) - ) - .subquery() -) - -VALIDATED_URLS_WITHOUT_DS_ALIAS = aliased( - URL, - VALIDATED_URLS_WITHOUT_DS_SQ -) \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_approved/queries/get.py b/src/core/tasks/url/operators/submit_approved/queries/get.py deleted file mode 100644 index fb43dd34..00000000 --- a/src/core/tasks/url/operators/submit_approved/queries/get.py +++ /dev/null @@ -1,68 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import selectinload - -from src.core.tasks.url.operators.submit_approved.queries.cte import VALIDATED_URLS_WITHOUT_DS_ALIAS -from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO -from src.db.helpers.session import session_helper as sh -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.queries.base.builder import QueryBuilderBase - - -class GetValidatedURLsQueryBuilder(QueryBuilderBase): - - async def run(self, session: AsyncSession) -> list[SubmitApprovedURLTDO]: - query = await self._build_query() - urls = await sh.scalars(session, query) - return await self._process_results(urls) - - async def _process_results(self, urls): - results: list[SubmitApprovedURLTDO] = [] - for url in urls: - try: - tdo = await self._process_result(url) - except Exception as e: - raise ValueError(f"Failed to process url {url.id}") from e - results.append(tdo) - return results - - @staticmethod - async def _build_query(): - query = ( - select(VALIDATED_URLS_WITHOUT_DS_ALIAS) - .options( - selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.optional_data_source_metadata), - selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.confirmed_agencies), - selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.reviewing_user), - selectinload(VALIDATED_URLS_WITHOUT_DS_ALIAS.record_type), - ).limit(100) - ) - return query - - @staticmethod - async def _process_result(url: URL) -> SubmitApprovedURLTDO: - agency_ids = [] - for agency in url.confirmed_agencies: - agency_ids.append(agency.agency_id) - optional_metadata = url.optional_data_source_metadata - if optional_metadata is None: - record_formats = None - data_portal_type = None - supplying_entity = None - else: - record_formats = optional_metadata.record_formats - data_portal_type = optional_metadata.data_portal_type - supplying_entity = optional_metadata.supplying_entity - tdo = SubmitApprovedURLTDO( - url_id=url.id, - url=url.full_url, - name=url.name, - agency_ids=agency_ids, - description=url.description, - record_type=url.record_type.record_type, - record_formats=record_formats, - data_portal_type=data_portal_type, - supplying_entity=supplying_entity, - approving_user_id=url.reviewing_user.user_id - ) - return tdo \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_approved/queries/has_validated.py b/src/core/tasks/url/operators/submit_approved/queries/has_validated.py deleted file mode 100644 index 2cbee486..00000000 --- a/src/core/tasks/url/operators/submit_approved/queries/has_validated.py +++ /dev/null @@ -1,18 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession - -from src.core.tasks.url.operators.submit_approved.queries.cte import VALIDATED_URLS_WITHOUT_DS_ALIAS -from src.db.helpers.session import session_helper as sh -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.queries.base.builder import QueryBuilderBase - - -class HasValidatedURLsQueryBuilder(QueryBuilderBase): - - async def run(self, session: AsyncSession) -> bool: - query = ( - select(VALIDATED_URLS_WITHOUT_DS_ALIAS) - .limit(1) - ) - url: URL | None = await sh.one_or_none(session, query=query) - return url is not None \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py b/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py deleted file mode 100644 index 4ebfef56..00000000 --- a/src/core/tasks/url/operators/submit_approved/queries/mark_submitted.py +++ /dev/null @@ -1,29 +0,0 @@ -from sqlalchemy import update -from sqlalchemy.ext.asyncio import AsyncSession - -from src.collectors.enums import URLStatus -from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource -from src.db.queries.base.builder import QueryBuilderBase - - -class MarkURLsAsSubmittedQueryBuilder(QueryBuilderBase): - - def __init__(self, infos: list[SubmittedURLInfo]): - super().__init__() - self.infos = infos - - async def run(self, session: AsyncSession): - for info in self.infos: - url_id = info.url_id - data_source_id = info.data_source_id - - url_data_source_object = URLDataSource( - url_id=url_id, - data_source_id=data_source_id - ) - if info.submitted_at is not None: - url_data_source_object.created_at = info.submitted_at - session.add(url_data_source_object) - diff --git a/src/core/tasks/url/operators/submit_approved/tdo.py b/src/core/tasks/url/operators/submit_approved/tdo.py deleted file mode 100644 index 89d89d9e..00000000 --- a/src/core/tasks/url/operators/submit_approved/tdo.py +++ /dev/null @@ -1,26 +0,0 @@ -from datetime import datetime - -from pydantic import BaseModel - -from src.core.enums import RecordType - - -class SubmitApprovedURLTDO(BaseModel): - url_id: int - url: str - record_type: RecordType - agency_ids: list[int] - name: str - description: str | None = None - approving_user_id: int - record_formats: list[str] | None = None - data_portal_type: str | None = None - supplying_entity: str | None = None - data_source_id: int | None = None - request_error: str | None = None - -class SubmittedURLInfo(BaseModel): - url_id: int - data_source_id: int | None - request_error: str | None - submitted_at: datetime | None = None \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_meta_urls/core.py b/src/core/tasks/url/operators/submit_meta_urls/core.py deleted file mode 100644 index ae41d56b..00000000 --- a/src/core/tasks/url/operators/submit_meta_urls/core.py +++ /dev/null @@ -1,78 +0,0 @@ -from src.core.tasks.url.operators.base import URLTaskOperatorBase -from src.core.tasks.url.operators.submit_meta_urls.queries.get import GetMetaURLsForSubmissionQueryBuilder -from src.core.tasks.url.operators.submit_meta_urls.queries.prereq import \ - MeetsMetaURLSSubmissionPrerequisitesQueryBuilder -from src.db.client.async_ import AsyncDatabaseClient -from src.db.dtos.url.mapping_.simple import SimpleURLMapping -from src.db.enums import TaskType -from src.db.models.impl.url.ds_meta_url.pydantic import URLDSMetaURLPydantic -from src.db.models.impl.url.task_error.pydantic_.small import URLTaskErrorSmall -from src.external.pdap.client import PDAPClient -from src.external.pdap.impl.meta_urls.enums import SubmitMetaURLsStatus -from src.external.pdap.impl.meta_urls.request import SubmitMetaURLsRequest -from src.external.pdap.impl.meta_urls.response import SubmitMetaURLsResponse -from src.util.url_mapper_.simple import SimpleURLMapper - - -class SubmitMetaURLsTaskOperator(URLTaskOperatorBase): - - def __init__( - self, - adb_client: AsyncDatabaseClient, - pdap_client: PDAPClient - ): - super().__init__(adb_client) - self.pdap_client = pdap_client - - @property - def task_type(self) -> TaskType: - return TaskType.SUBMIT_META_URLS - - async def meets_task_prerequisites(self) -> bool: - return await self.adb_client.run_query_builder( - MeetsMetaURLSSubmissionPrerequisitesQueryBuilder() - ) - - async def inner_task_logic(self) -> None: - requests: list[SubmitMetaURLsRequest] = await self.adb_client.run_query_builder( - GetMetaURLsForSubmissionQueryBuilder() - ) - - url_mappings: list[SimpleURLMapping] = [ - SimpleURLMapping( - url=request.url, - url_id=request.url_id, - ) - for request in requests - ] - - mapper = SimpleURLMapper(url_mappings) - - await self.link_urls_to_task(mapper.get_all_ids()) - - responses: list[SubmitMetaURLsResponse] = \ - await self.pdap_client.submit_meta_urls(requests) - - errors: list[URLTaskErrorSmall] = [] - inserts: list[URLDSMetaURLPydantic] = [] - - for response in responses: - url_id: int = mapper.get_id(response.url) - if response.status == SubmitMetaURLsStatus.SUCCESS: - inserts.append( - URLDSMetaURLPydantic( - url_id=url_id, - agency_id=response.agency_id, - ds_meta_url_id=response.meta_url_id - ) - ) - else: - errors.append( - URLTaskErrorSmall( - url_id=url_id, - error=response.error, - ) - ) - - await self.add_task_errors(errors) - await self.adb_client.bulk_insert(inserts) diff --git a/src/core/tasks/url/operators/submit_meta_urls/queries/cte.py b/src/core/tasks/url/operators/submit_meta_urls/queries/cte.py deleted file mode 100644 index 54b1edf8..00000000 --- a/src/core/tasks/url/operators/submit_meta_urls/queries/cte.py +++ /dev/null @@ -1,61 +0,0 @@ -from sqlalchemy import select, exists, Column, CTE - -from src.db.enums import TaskType -from src.db.helpers.query import no_url_task_error -from src.db.models.impl.agency.sqlalchemy import Agency -from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.ds_meta_url.sqlalchemy import URLDSMetaURL -from src.db.models.views.meta_url import MetaURL - - -class SubmitMetaURLsPrerequisitesCTEContainer: - - def __init__(self): - - self._cte = ( - select( - URL.id.label("url_id"), - URL.full_url.label("url"), - LinkURLAgency.agency_id, - ) - # Validated as Meta URL - .join( - MetaURL, - MetaURL.url_id == URL.id - ) - .join( - LinkURLAgency, - LinkURLAgency.url_id == URL.id - ) - # Does not have a submission - .where( - ~exists( - select( - URLDSMetaURL.ds_meta_url_id - ) - .where( - URLDSMetaURL.url_id == URL.id, - URLDSMetaURL.agency_id == LinkURLAgency.agency_id - ) - ), - no_url_task_error(TaskType.SUBMIT_META_URLS) - ) - .cte("submit_meta_urls_prerequisites") - ) - - @property - def cte(self) -> CTE: - return self._cte - - @property - def url_id(self) -> Column[int]: - return self._cte.c.url_id - - @property - def agency_id(self) -> Column[int]: - return self._cte.c.agency_id - - @property - def url(self) -> Column[str]: - return self._cte.c.url \ No newline at end of file diff --git a/src/core/tasks/url/operators/submit_meta_urls/queries/get.py b/src/core/tasks/url/operators/submit_meta_urls/queries/get.py deleted file mode 100644 index 518393f6..00000000 --- a/src/core/tasks/url/operators/submit_meta_urls/queries/get.py +++ /dev/null @@ -1,34 +0,0 @@ -from typing import Any, Sequence - -from sqlalchemy import select, RowMapping -from sqlalchemy.ext.asyncio import AsyncSession - -from src.core.tasks.url.operators.submit_meta_urls.queries.cte import SubmitMetaURLsPrerequisitesCTEContainer -from src.db.queries.base.builder import QueryBuilderBase -from src.external.pdap.impl.meta_urls.request import SubmitMetaURLsRequest - -from src.db.helpers.session import session_helper as sh - -class GetMetaURLsForSubmissionQueryBuilder(QueryBuilderBase): - - - async def run(self, session: AsyncSession) -> list[SubmitMetaURLsRequest]: - cte = SubmitMetaURLsPrerequisitesCTEContainer() - query = ( - select( - cte.url_id, - cte.agency_id, - cte.url - ) - ) - - mappings: Sequence[RowMapping] = await sh.mappings(session, query=query) - - return [ - SubmitMetaURLsRequest( - url_id=mapping["url_id"], - agency_id=mapping["agency_id"], - url=mapping["url"], - ) - for mapping in mappings - ] diff --git a/src/core/tasks/url/operators/submit_meta_urls/queries/prereq.py b/src/core/tasks/url/operators/submit_meta_urls/queries/prereq.py deleted file mode 100644 index 3b5538be..00000000 --- a/src/core/tasks/url/operators/submit_meta_urls/queries/prereq.py +++ /dev/null @@ -1,20 +0,0 @@ -from sqlalchemy import select -from sqlalchemy.ext.asyncio import AsyncSession - -from src.core.tasks.url.operators.submit_meta_urls.queries.cte import SubmitMetaURLsPrerequisitesCTEContainer -from src.db.queries.base.builder import QueryBuilderBase -from src.db.helpers.session import session_helper as sh - - -class MeetsMetaURLSSubmissionPrerequisitesQueryBuilder(QueryBuilderBase): - - - async def run(self, session: AsyncSession) -> bool: - cte = SubmitMetaURLsPrerequisitesCTEContainer() - query = ( - select( - cte.url_id, - ) - ) - - return await sh.has_results(session, query=query) \ No newline at end of file diff --git a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py index e9df9db4..141393bd 100644 --- a/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py +++ b/src/core/tasks/url/operators/validate/queries/ctes/counts/impl/agency.py @@ -1,23 +1,23 @@ from sqlalchemy import select, func from src.core.tasks.url.operators.validate.queries.ctes.counts.core import ValidatedCountsCTEContainer -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.views.unvalidated_url import UnvalidatedURL AGENCY_VALIDATION_COUNTS_CTE = ValidatedCountsCTEContainer( ( select( - UserUrlAgencySuggestion.url_id, - UserUrlAgencySuggestion.agency_id.label("entity"), + UserURLAgencySuggestion.url_id, + UserURLAgencySuggestion.agency_id.label("entity"), func.count().label("votes") ) .join( UnvalidatedURL, - UserUrlAgencySuggestion.url_id == UnvalidatedURL.url_id + UserURLAgencySuggestion.url_id == UnvalidatedURL.url_id ) .group_by( - UserUrlAgencySuggestion.url_id, - UserUrlAgencySuggestion.agency_id + UserURLAgencySuggestion.url_id, + UserURLAgencySuggestion.agency_id ) .cte("counts_agency") ) diff --git a/src/db/client/async_.py b/src/db/client/async_.py index 93af63f9..95bc7082 100644 --- a/src/db/client/async_.py +++ b/src/db/client/async_.py @@ -1,9 +1,9 @@ -from datetime import datetime, timedelta +from datetime import datetime from functools import wraps from typing import Optional, Type, Any, List, Sequence -from sqlalchemy import select, exists, func, Select, and_, update, delete, Row, text -from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker +from sqlalchemy import select, func, Select, and_, update, Row, text, Engine +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker, AsyncEngine from sqlalchemy.orm import selectinload from src.api.endpoints.annotate.all.get.models.response import GetNextURLForAllAnnotationResponse @@ -46,8 +46,6 @@ from src.core.tasks.url.operators.html.queries.get import \ GetPendingURLsWithoutHTMLDataQueryBuilder from src.core.tasks.url.operators.misc_metadata.tdo import URLMiscellaneousMetadataTDO -from src.core.tasks.url.operators.submit_approved.queries.mark_submitted import MarkURLsAsSubmittedQueryBuilder -from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo from src.db.client.helpers import add_standard_limit_and_offset from src.db.client.types import UserSuggestionModel from src.db.config_manager import ConfigManager @@ -58,7 +56,7 @@ from src.db.dtos.url.raw_html import RawHTMLInfo from src.db.enums import TaskType from src.db.helpers.session import session_helper as sh -from src.db.models.impl.agency.enums import AgencyType +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency from src.db.models.impl.backlog_snapshot import BacklogSnapshot from src.db.models.impl.batch.pydantic.info import BatchInfo @@ -74,14 +72,13 @@ from src.db.models.impl.task.core import Task from src.db.models.impl.task.enums import TaskStatus from src.db.models.impl.task.error import TaskError -from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate from src.db.models.impl.url.core.pydantic.info import URLInfo from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.pydantic.input import AutoRelevancyAnnotationInput @@ -92,7 +89,6 @@ from src.db.models.templates_.base import Base from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum from src.db.queries.base.builder import QueryBuilderBase -from src.db.queries.implementations.core.get.html_content_info import GetHTMLContentInfoQueryBuilder from src.db.queries.implementations.core.get.recent_batch_summaries.builder import GetRecentBatchSummariesQueryBuilder from src.db.queries.implementations.core.metrics.urls.aggregated.pending import \ GetMetricsURLSAggregatedPendingQueryBuilder @@ -107,15 +103,15 @@ class AsyncDatabaseClient: - def __init__(self, db_url: str | None = None): - if db_url is None: + def __init__(self, engine: AsyncEngine | None = None): + if engine is None: db_url = EnvVarManager.get().get_postgres_connection_string(is_async=True) - self.db_url = db_url - echo = ConfigManager.get_sqlalchemy_echo() - self.engine = create_async_engine( - url=db_url, - echo=echo, - ) + echo = ConfigManager.get_sqlalchemy_echo() + engine = create_async_engine( + url=db_url, + echo=echo, + ) + self.engine = engine self.session_maker = async_sessionmaker(bind=self.engine, expire_on_commit=False) self.statement_composer = StatementComposer() @@ -144,8 +140,8 @@ async def wrapper(self, *args, **kwargs): return wrapper @session_manager - async def execute(self, session: AsyncSession, statement): - await session.execute(statement) + async def execute(self, session: AsyncSession, statement) -> Any: + return await session.execute(statement) @session_manager async def add( @@ -455,6 +451,12 @@ async def get_all( """Get all records of a model. Used primarily in testing.""" return await sh.get_all(session=session, model=model, order_by_attribute=order_by_attribute) + + @session_manager + async def has_no_rows(self, session: AsyncSession, model: Base) -> bool: + results: list[Base] = await sh.get_all(session=session, model=model) + return len(results) == 0 + async def get_urls( self, page: int, @@ -507,9 +509,6 @@ async def get_task_info( ) -> TaskInfo: return await self.run_query_builder(GetTaskInfoQueryBuilder(task_id)) - async def get_html_content_info(self, url_id: int) -> list[URLHTMLContentInfo]: - return await self.run_query_builder(GetHTMLContentInfoQueryBuilder(url_id)) - @session_manager async def link_urls_to_task( self, @@ -589,11 +588,14 @@ async def upsert_new_agencies( Add or update agencies in the database """ for suggestion in suggestions: - query = select(Agency).where(Agency.agency_id == suggestion.pdap_agency_id) + query = select(Agency).where(Agency.id == suggestion.pdap_agency_id) result = await session.execute(query) agency = result.scalars().one_or_none() if agency is None: - agency = Agency(agency_id=suggestion.pdap_agency_id) + agency = Agency( + id=suggestion.pdap_agency_id, + jurisdiction_type=JurisdictionType.LOCAL + ) agency.name = suggestion.agency_name agency.agency_type = AgencyType.UNKNOWN session.add(agency) @@ -625,17 +627,18 @@ async def add_agency_manual_suggestion( # Check if agency exists in database -- if not, add with placeholder if agency_id is not None: - statement = select(Agency).where(Agency.agency_id == agency_id) + statement = select(Agency).where(Agency.id == agency_id) result = await session.execute(statement) if len(result.all()) == 0: agency = Agency( - agency_id=agency_id, + id=agency_id, name=PLACEHOLDER_AGENCY_NAME, agency_type=AgencyType.UNKNOWN, + jurisdiction_type=JurisdictionType.LOCAL ) await session.merge(agency) - url_agency_suggestion = UserUrlAgencySuggestion( + url_agency_suggestion = UserURLAgencySuggestion( url_id=url_id, agency_id=agency_id, user_id=user_id, @@ -643,12 +646,6 @@ async def add_agency_manual_suggestion( ) session.add(url_agency_suggestion) - @session_manager - async def get_urls_with_confirmed_agencies(self, session: AsyncSession) -> list[URL]: - statement = select(URL).where(exists().where(LinkURLAgency.url_id == URL.id)) - results = await session.execute(statement) - return list(results.scalars().all()) - async def approve_url( self, approval_info: FinalReviewApprovalInfo, @@ -761,9 +758,6 @@ async def update_batch_post_collection( batch.status = batch_status.value batch.compute_time = compute_time - async def mark_urls_as_submitted(self, infos: list[SubmittedURLInfo]): - await self.run_query_builder(MarkURLsAsSubmittedQueryBuilder(infos)) - async def get_duplicates_by_batch_id(self, batch_id: int, page: int) -> list[DuplicateInfo]: return await self.run_query_builder( GetDuplicatesByBatchIDQueryBuilder( @@ -798,15 +792,6 @@ async def get_logs_by_batch_id(self, session, batch_id: int) -> List[LogOutputIn logs = raw_results.scalars().all() return ([LogOutputInfo(**log.__dict__) for log in logs]) - async def delete_old_logs(self): - """ - Delete logs older than a day - """ - statement = delete(Log).where( - Log.created_at < datetime.now() - timedelta(days=7) - ) - await self.execute(statement) - async def get_next_url_for_all_annotations( self, user_id: int, @@ -869,11 +854,11 @@ async def get_urls_breakdown_submitted_metrics( ) -> GetMetricsURLsBreakdownSubmittedResponseDTO: # Build the query - month = func.date_trunc('month', URLDataSource.created_at) + month = func.date_trunc('month', DSAppLinkDataSource.created_at) query = ( select( month.label('month'), - func.count(URLDataSource.id).label('count_submitted'), + func.count(DSAppLinkDataSource.id).label('count_submitted'), ) .group_by(month) .order_by(month.asc()) @@ -939,12 +924,6 @@ async def mark_all_as_404(self, url_ids: List[int]): query = update(URLWebMetadata).where(URLWebMetadata.url_id.in_(url_ids)).values(status_code=404) await self.execute(query) - @session_manager - async def mark_as_checked_for_duplicates(self, session: AsyncSession, url_ids: list[int]): - for url_id in url_ids: - url_checked_for_duplicate = URLCheckedForDuplicate(url_id=url_id) - session.add(url_checked_for_duplicate) - async def get_urls_aggregated_pending_metrics(self): return await self.run_query_builder(GetMetricsURLSAggregatedPendingQueryBuilder()) diff --git a/src/db/client/sync.py b/src/db/client/sync.py index 966d4bbd..2e9e6f9b 100644 --- a/src/db/client/sync.py +++ b/src/db/client/sync.py @@ -1,7 +1,7 @@ from functools import wraps from typing import List -from sqlalchemy import create_engine, Select +from sqlalchemy import create_engine, Select, Engine from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import sessionmaker, scoped_session, Session @@ -16,10 +16,10 @@ from src.db.models.templates_.base import Base from src.db.models.impl.duplicate.sqlalchemy import Duplicate from src.db.models.impl.log.sqlalchemy import Log -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.batch.sqlalchemy import Batch -from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo +from tests.helpers.data_creator.commands.impl.urls_.tdo import SubmittedURLInfo from src.core.env_var_manager import EnvVarManager from src.core.enums import BatchStatus from src.util.models.url_and_scheme import URLAndScheme @@ -28,15 +28,19 @@ # Database Client class DatabaseClient: - def __init__(self, db_url: str | None = None): + def __init__( + self, + engine: Engine | None = None + ): """Initialize the DatabaseClient.""" - if db_url is None: + if engine is None: db_url = EnvVarManager.get().get_postgres_connection_string(is_async=True) + engine = create_engine( + url=db_url, + echo=ConfigManager.get_sqlalchemy_echo(), + ) - self.engine = create_engine( - url=db_url, - echo=ConfigManager.get_sqlalchemy_echo(), - ) + self.engine = engine self.session_maker = scoped_session(sessionmaker(bind=self.engine)) self.session = None @@ -141,7 +145,7 @@ def insert_url(self, session, url_info: URLInfo) -> int: return url_entry.id def insert_urls(self, url_infos: List[URLInfo], batch_id: int) -> InsertURLsInfo: - url_mappings = [] + url_mappings: list[SimpleURLMapping] = [] duplicates = [] for url_info in url_infos: url_info.batch_id = batch_id @@ -225,9 +229,9 @@ def mark_urls_as_submitted( url_id = info.url_id data_source_id = info.data_source_id - url_data_source_object = URLDataSource( + url_data_source_object = DSAppLinkDataSource( url_id=url_id, - data_source_id=data_source_id + ds_data_source_id=data_source_id ) if info.submitted_at is not None: url_data_source_object.created_at = info.submitted_at diff --git a/src/db/client/types.py b/src/db/client/types.py index ffce5621..18b32b88 100644 --- a/src/db/client/types.py +++ b/src/db/client/types.py @@ -1,5 +1,5 @@ -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion -UserSuggestionModel = UserURLTypeSuggestion or UserRecordTypeSuggestion or UserUrlAgencySuggestion +UserSuggestionModel = UserURLTypeSuggestion or UserRecordTypeSuggestion or UserURLAgencySuggestion diff --git a/src/db/constants.py b/src/db/constants.py index a3574a96..67ff66a5 100644 --- a/src/db/constants.py +++ b/src/db/constants.py @@ -1,4 +1,4 @@ -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion @@ -9,5 +9,5 @@ USER_ANNOTATION_MODELS = [ UserURLTypeSuggestion, UserRecordTypeSuggestion, - UserUrlAgencySuggestion + UserURLAgencySuggestion ] \ No newline at end of file diff --git a/src/db/dto_converter.py b/src/db/dto_converter.py index f0c9b097..dab6b496 100644 --- a/src/db/dto_converter.py +++ b/src/db/dto_converter.py @@ -13,7 +13,7 @@ from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.url.html.content.enums import HTMLContentType from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion diff --git a/src/db/enums.py b/src/db/enums.py index b232c188..053fdace 100644 --- a/src/db/enums.py +++ b/src/db/enums.py @@ -55,8 +55,6 @@ class TaskType(PyEnum): # Scheduled Tasks PUSH_TO_HUGGINGFACE = "Push to Hugging Face" - SYNC_AGENCIES = "Sync Agencies" - SYNC_DATA_SOURCES = "Sync Data Sources" POPULATE_BACKLOG_SNAPSHOT = "Populate Backlog Snapshot" DELETE_OLD_LOGS = "Delete Old Logs" DELETE_STALE_SCREENSHOTS = "Delete Stale Screenshots" @@ -65,6 +63,16 @@ class TaskType(PyEnum): TASK_CLEANUP = "Task Cleanup" REFRESH_MATERIALIZED_VIEWS = "Refresh Materialized Views" + SYNC_AGENCIES_ADD = "Sync Agencies Add" + SYNC_AGENCIES_UPDATE = "Sync Agencies Update" + SYNC_AGENCIES_DELETE = "Sync Agencies Delete" + SYNC_DATA_SOURCES_ADD = "Sync Data Sources Add" + SYNC_DATA_SOURCES_UPDATE = "Sync Data Sources Update" + SYNC_DATA_SOURCES_DELETE = "Sync Data Sources Delete" + SYNC_META_URLS_ADD = "Sync Meta URLs Add" + SYNC_META_URLS_UPDATE = "Sync Meta URLs Update" + SYNC_META_URLS_DELETE = "Sync Meta URLs Delete" + class ChangeLogOperationType(PyEnum): INSERT = "INSERT" UPDATE = "UPDATE" diff --git a/src/db/models/helpers.py b/src/db/models/helpers.py index f547e8d4..592973a6 100644 --- a/src/db/models/helpers.py +++ b/src/db/models/helpers.py @@ -11,7 +11,7 @@ def get_agency_id_foreign_column( return Column( 'agency_id', Integer(), - ForeignKey('agencies.agency_id', ondelete='CASCADE'), + ForeignKey('agencies.id', ondelete='CASCADE'), nullable=nullable ) diff --git a/src/db/models/impl/agency/ds_link/__init__.py b/src/db/models/impl/agency/ds_link/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/agency/ds_link/sqlalchemy.py b/src/db/models/impl/agency/ds_link/sqlalchemy.py new file mode 100644 index 00000000..32911882 --- /dev/null +++ b/src/db/models/impl/agency/ds_link/sqlalchemy.py @@ -0,0 +1,19 @@ +from sqlalchemy import Integer, Column + +from src.db.models.mixins import CreatedAtMixin, AgencyDependentMixin, LastSyncedAtMixin +from src.db.models.templates_.base import Base + + +class DSAppLinkAgency( + Base, + CreatedAtMixin, + AgencyDependentMixin, + LastSyncedAtMixin +): + __tablename__ = "ds_app_link_agency" + + ds_agency_id = Column( + Integer, + primary_key=True, + nullable=False + ) \ No newline at end of file diff --git a/src/db/models/impl/agency/sqlalchemy.py b/src/db/models/impl/agency/sqlalchemy.py index 28717bfd..e72e1038 100644 --- a/src/db/models/impl/agency/sqlalchemy.py +++ b/src/db/models/impl/agency/sqlalchemy.py @@ -18,28 +18,22 @@ class Agency( ): __tablename__ = "agencies" - # TODO: Rename agency_id to ds_agency_id - - agency_id = Column( - Integer, - Sequence("agencies_agency_id"), - primary_key=True) name = Column(String, nullable=False) agency_type: Mapped[AgencyType] = enum_column(AgencyType, name="agency_type_enum") jurisdiction_type: Mapped[JurisdictionType] = enum_column( JurisdictionType, name="jurisdiction_type_enum", - nullable=True, + nullable=False, ) # Relationships automated_suggestions = relationship("AgencyIDSubtaskSuggestion") - user_suggestions = relationship("UserUrlAgencySuggestion", back_populates="agency") + user_suggestions = relationship("UserURLAgencySuggestion", back_populates="agency") confirmed_urls = relationship("LinkURLAgency", back_populates="agency") locations = relationship( "LocationExpandedView", - primaryjoin="Agency.agency_id == LinkAgencyLocation.agency_id", + primaryjoin="Agency.id == LinkAgencyLocation.agency_id", secondaryjoin="LocationExpandedView.id == LinkAgencyLocation.location_id", secondary="link_agencies_locations", ) diff --git a/src/db/models/impl/flag/ds_delete/__init__.py b/src/db/models/impl/flag/ds_delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/flag/ds_delete/agency.py b/src/db/models/impl/flag/ds_delete/agency.py new file mode 100644 index 00000000..2559376d --- /dev/null +++ b/src/db/models/impl/flag/ds_delete/agency.py @@ -0,0 +1,20 @@ +from sqlalchemy import ForeignKey, Integer, Column + +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.base import Base + + +class FlagDSDeleteAgency( + Base, + CreatedAtMixin +): + __tablename__ = "flag_ds_delete_agency" + + ds_agency_id = Column( + Integer, + ForeignKey( + "ds_app_link_agency.ds_agency_id", + ondelete="CASCADE" + ), + primary_key=True, + ) \ No newline at end of file diff --git a/src/db/models/impl/flag/ds_delete/data_source.py b/src/db/models/impl/flag/ds_delete/data_source.py new file mode 100644 index 00000000..38d3cba8 --- /dev/null +++ b/src/db/models/impl/flag/ds_delete/data_source.py @@ -0,0 +1,20 @@ +from sqlalchemy import ForeignKey, Integer, Column + +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.base import Base + + +class FlagDSDeleteDataSource( + Base, + CreatedAtMixin +): + __tablename__ = "flag_ds_delete_data_source" + + ds_data_source_id = Column( + Integer, + ForeignKey( + "ds_app_link_data_source.ds_data_source_id", + ondelete="CASCADE" + ), + primary_key=True, + ) \ No newline at end of file diff --git a/src/db/models/impl/flag/ds_delete/meta_url.py b/src/db/models/impl/flag/ds_delete/meta_url.py new file mode 100644 index 00000000..1fc90d06 --- /dev/null +++ b/src/db/models/impl/flag/ds_delete/meta_url.py @@ -0,0 +1,20 @@ +from sqlalchemy import Column, Integer, ForeignKey + +from src.db.models.mixins import CreatedAtMixin +from src.db.models.templates_.base import Base + + +class FlagDSDeleteMetaURL( + Base, + CreatedAtMixin +): + __tablename__ = "flag_ds_delete_meta_url" + + ds_meta_url_id = Column( + Integer, + ForeignKey( + 'ds_app_link_meta_url.ds_meta_url_id', + ondelete='CASCADE' + ), + primary_key=True, + ) \ No newline at end of file diff --git a/src/db/models/impl/link/url_agency/sqlalchemy.py b/src/db/models/impl/link/url_agency/sqlalchemy.py index 875fa25f..92d1c37b 100644 --- a/src/db/models/impl/link/url_agency/sqlalchemy.py +++ b/src/db/models/impl/link/url_agency/sqlalchemy.py @@ -11,8 +11,8 @@ class LinkURLAgency(URLDependentMixin, WithIDBase): agency_id: Mapped[int] = get_agency_id_foreign_column() - url = relationship("URL", back_populates="confirmed_agencies") - agency = relationship("Agency", back_populates="confirmed_urls") + url = relationship("URL") + agency = relationship("Agency") __table_args__ = ( UniqueConstraint("url_id", "agency_id", name="uq_confirmed_url_agency"), diff --git a/src/db/models/impl/link/url_redirect_url/sqlalchemy.py b/src/db/models/impl/link/url_redirect_url/sqlalchemy.py index 312cbb57..534c7213 100644 --- a/src/db/models/impl/link/url_redirect_url/sqlalchemy.py +++ b/src/db/models/impl/link/url_redirect_url/sqlalchemy.py @@ -1,3 +1,5 @@ +from sqlalchemy.orm import Mapped + from src.db.models.helpers import url_id_column from src.db.models.templates_.standard import StandardBase @@ -5,6 +7,6 @@ class LinkURLRedirectURL(StandardBase): __tablename__ = "link_urls_redirect_url" - source_url_id = url_id_column() - destination_url_id = url_id_column() + source_url_id: Mapped[int] = url_id_column() + destination_url_id: Mapped[int] = url_id_column() diff --git a/src/db/models/impl/link/urls_root_url/sqlalchemy.py b/src/db/models/impl/link/urls_root_url/sqlalchemy.py index a856dd31..8dcd7085 100644 --- a/src/db/models/impl/link/urls_root_url/sqlalchemy.py +++ b/src/db/models/impl/link/urls_root_url/sqlalchemy.py @@ -1,3 +1,5 @@ +from sqlalchemy.orm import Mapped + from src.db.models.helpers import url_id_column from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, UpdatedAtMixin from src.db.models.templates_.with_id import WithIDBase @@ -11,4 +13,4 @@ class LinkURLRootURL( ): __tablename__ = "link_urls_root_url" - root_url_id = url_id_column() \ No newline at end of file + root_url_id: Mapped[int] = url_id_column() \ No newline at end of file diff --git a/src/db/models/impl/sync_log/__init__.py b/src/db/models/impl/sync_log/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/db/models/impl/sync_log/enums.py b/src/db/models/impl/sync_log/enums.py new file mode 100644 index 00000000..e1fe483a --- /dev/null +++ b/src/db/models/impl/sync_log/enums.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class ResourceType(Enum): + AGENCY = 'agency' + DATA_SOURCE = 'data_source' + META_URL = 'meta_url' + +class SyncType(Enum): + ADD = 'add' + UPDATE = 'update' + DELETE = 'delete' \ No newline at end of file diff --git a/src/db/models/impl/sync_log/sqlalchemy.py b/src/db/models/impl/sync_log/sqlalchemy.py new file mode 100644 index 00000000..b545940f --- /dev/null +++ b/src/db/models/impl/sync_log/sqlalchemy.py @@ -0,0 +1,17 @@ +from sqlalchemy import PrimaryKeyConstraint, Column, Integer, DateTime + +from src.db.models.helpers import enum_column +from src.db.models.impl.sync_log.enums import ResourceType, SyncType +from src.db.models.templates_.base import Base + + +class SyncLog(Base): + __tablename__ = 'sync_log' + __table_args__ = ( + PrimaryKeyConstraint('resource_type', 'sync_type', 'created_at'), + ) + + resource_type = enum_column(ResourceType, name='resource_type_enum') + sync_type = enum_column(SyncType, name='sync_type_enum') + count = Column(Integer, nullable=False) + created_at = Column(DateTime, nullable=False) \ No newline at end of file diff --git a/src/db/models/impl/url/core/sqlalchemy.py b/src/db/models/impl/url/core/sqlalchemy.py index 02d4fbf2..5bdcdadb 100644 --- a/src/db/models/impl/url/core/sqlalchemy.py +++ b/src/db/models/impl/url/core/sqlalchemy.py @@ -95,7 +95,7 @@ def full_url(cls): URLNameSuggestion ) user_agency_suggestions = relationship( - "UserUrlAgencySuggestion", back_populates="url") + "UserURLAgencySuggestion", back_populates="url") auto_record_type_suggestion = relationship( "AutoRecordTypeSuggestion", uselist=False, back_populates="url") user_record_type_suggestions = relationship( @@ -109,10 +109,12 @@ def full_url(cls): optional_data_source_metadata = relationship( "URLOptionalDataSourceMetadata", uselist=False, back_populates="url") confirmed_agencies = relationship( - "LinkURLAgency", + "Agency", + secondary="link_urls_agency" + ) data_source = relationship( - "URLDataSource", + "DSAppLinkDataSource", back_populates="url", uselist=False ) diff --git a/src/db/models/impl/url/data_source/pydantic.py b/src/db/models/impl/url/data_source/pydantic.py index 7d02c5df..49a83ac8 100644 --- a/src/db/models/impl/url/data_source/pydantic.py +++ b/src/db/models/impl/url/data_source/pydantic.py @@ -1,11 +1,11 @@ -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.templates.markers.bulk.insert import BulkInsertableModel class URLDataSourcePydantic(BulkInsertableModel): - data_source_id: int + ds_data_source_id: int url_id: int @classmethod - def sa_model(cls) -> type[URLDataSource]: - return URLDataSource \ No newline at end of file + def sa_model(cls) -> type[DSAppLinkDataSource]: + return DSAppLinkDataSource \ No newline at end of file diff --git a/src/db/models/impl/url/data_source/sqlalchemy.py b/src/db/models/impl/url/data_source/sqlalchemy.py index be7bf047..74c9bdf0 100644 --- a/src/db/models/impl/url/data_source/sqlalchemy.py +++ b/src/db/models/impl/url/data_source/sqlalchemy.py @@ -1,14 +1,27 @@ -from sqlalchemy import Column, Integer +from sqlalchemy import Column, Integer, ForeignKey from sqlalchemy.orm import relationship -from src.db.models.mixins import CreatedAtMixin, URLDependentMixin +from src.db.models.mixins import CreatedAtMixin, URLDependentMixin, LastSyncedAtMixin from src.db.models.templates_.with_id import WithIDBase -class URLDataSource(CreatedAtMixin, URLDependentMixin, WithIDBase): - __tablename__ = "url_data_source" +class DSAppLinkDataSource( + CreatedAtMixin, + URLDependentMixin, + WithIDBase, + LastSyncedAtMixin +): + __tablename__ = "ds_app_link_data_source" - data_source_id = Column(Integer, nullable=False) + url_id = Column( + Integer, + ForeignKey( + 'urls.id', + ondelete="SET NULL", + ), + nullable=True + ) + ds_data_source_id = Column(Integer, nullable=False, primary_key=True) # Relationships url = relationship( diff --git a/src/db/models/impl/url/ds_meta_url/pydantic.py b/src/db/models/impl/url/ds_meta_url/pydantic.py index 8f7674e9..60a83e3b 100644 --- a/src/db/models/impl/url/ds_meta_url/pydantic.py +++ b/src/db/models/impl/url/ds_meta_url/pydantic.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from src.db.models.impl.url.ds_meta_url.sqlalchemy import URLDSMetaURL +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL class URLDSMetaURLPydantic(BaseModel): @@ -10,5 +10,5 @@ class URLDSMetaURLPydantic(BaseModel): agency_id: int @classmethod - def sa_model(cls) -> type[URLDSMetaURL]: - return URLDSMetaURL \ No newline at end of file + def sa_model(cls) -> type[DSAppLinkMetaURL]: + return DSAppLinkMetaURL \ No newline at end of file diff --git a/src/db/models/impl/url/ds_meta_url/sqlalchemy.py b/src/db/models/impl/url/ds_meta_url/sqlalchemy.py index e642a694..1d74c12d 100644 --- a/src/db/models/impl/url/ds_meta_url/sqlalchemy.py +++ b/src/db/models/impl/url/ds_meta_url/sqlalchemy.py @@ -1,20 +1,26 @@ -from sqlalchemy import Column, Integer, PrimaryKeyConstraint, UniqueConstraint +from sqlalchemy import Column, Integer, PrimaryKeyConstraint, UniqueConstraint, ForeignKey -from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, AgencyDependentMixin +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, AgencyDependentMixin, LastSyncedAtMixin from src.db.models.templates_.base import Base -class URLDSMetaURL( +class DSAppLinkMetaURL( Base, - URLDependentMixin, - AgencyDependentMixin, - CreatedAtMixin + CreatedAtMixin, + LastSyncedAtMixin ): - __tablename__ = "url_ds_meta_url" + __tablename__ = "ds_app_link_meta_url" - ds_meta_url_id = Column(Integer) + url_id = Column( + Integer, + ForeignKey( + 'urls.id', + ondelete="SET NULL", + ), + nullable=True + ) + ds_meta_url_id = Column(Integer, primary_key=True) __table_args__ = ( - PrimaryKeyConstraint("url_id", "agency_id"), - UniqueConstraint("ds_meta_url_id"), + UniqueConstraint("url_id"), ) \ No newline at end of file diff --git a/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py b/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py index 3f6e239b..4661be7a 100644 --- a/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py +++ b/src/db/models/impl/url/optional_ds_metadata/sqlalchemy.py @@ -4,11 +4,15 @@ from src.db.models.helpers import enum_column from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, AccessTypeEnum, \ RetentionScheduleEnum, UpdateMethodEnum -from src.db.models.mixins import URLDependentMixin +from src.db.models.mixins import URLDependentMixin, UpdatedAtMixin from src.db.models.templates_.with_id import WithIDBase -class URLOptionalDataSourceMetadata(URLDependentMixin, WithIDBase): +class URLOptionalDataSourceMetadata( + URLDependentMixin, + WithIDBase, + UpdatedAtMixin +): __tablename__ = 'url_optional_data_source_metadata' record_formats = Column(ARRAY(String), nullable=True) @@ -35,6 +39,7 @@ class URLOptionalDataSourceMetadata(URLDependentMixin, WithIDBase): values_callable=lambda AccessTypeEnum: [e.value for e in AccessTypeEnum] ) ), nullable=True) + data_portal_type_other = Column(String, nullable=True) # Relationships url = relationship("URL", uselist=False, back_populates="optional_data_source_metadata") diff --git a/src/db/models/impl/url/record_type/sqlalchemy.py b/src/db/models/impl/url/record_type/sqlalchemy.py index 7e8f2fac..23137fae 100644 --- a/src/db/models/impl/url/record_type/sqlalchemy.py +++ b/src/db/models/impl/url/record_type/sqlalchemy.py @@ -2,13 +2,14 @@ from src.core.enums import RecordType from src.db.models.helpers import url_id_primary_key_constraint, enum_column -from src.db.models.mixins import URLDependentMixin, CreatedAtMixin +from src.db.models.mixins import URLDependentMixin, CreatedAtMixin, UpdatedAtMixin from src.db.models.templates_.base import Base class URLRecordType( Base, CreatedAtMixin, + UpdatedAtMixin, URLDependentMixin ): __tablename__ = "url_record_type" diff --git a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py b/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py index 89371498..7a297ef1 100644 --- a/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/agency/subtask/sqlalchemy.py @@ -1,4 +1,4 @@ -from sqlalchemy.orm import relationship +from sqlalchemy.orm import relationship, Mapped from src.db.models.helpers import enum_column from src.db.models.impl.url.suggestion.agency.subtask.enum import AutoAgencyIDSubtaskType, SubtaskDetailCode @@ -16,7 +16,7 @@ class URLAutoAgencyIDSubtask( __tablename__ = "url_auto_agency_id_subtasks" - type = enum_column( + type: Mapped[AutoAgencyIDSubtaskType] = enum_column( AutoAgencyIDSubtaskType, name="agency_auto_suggestion_method" ) @@ -24,7 +24,7 @@ class URLAutoAgencyIDSubtask( sa.Boolean(), nullable=False ) - detail = enum_column( + detail: Mapped[SubtaskDetailCode] = enum_column( SubtaskDetailCode, name="agency_id_subtask_detail_code", ) diff --git a/src/db/models/impl/url/suggestion/agency/user.py b/src/db/models/impl/url/suggestion/agency/user.py index f7c43aad..2cd18851 100644 --- a/src/db/models/impl/url/suggestion/agency/user.py +++ b/src/db/models/impl/url/suggestion/agency/user.py @@ -6,15 +6,15 @@ from src.db.models.templates_.with_id import WithIDBase -class UserUrlAgencySuggestion(URLDependentMixin, WithIDBase): +class UserURLAgencySuggestion(URLDependentMixin, WithIDBase): __tablename__ = "user_url_agency_suggestions" agency_id: Mapped[int] = get_agency_id_foreign_column(nullable=True) user_id = Column(Integer, nullable=False) is_new = Column(Boolean, nullable=True) - agency = relationship("Agency", back_populates="user_suggestions") - url = relationship("URL", back_populates="user_agency_suggestions") + agency = relationship("Agency") + url = relationship("URL") __table_args__ = ( UniqueConstraint("agency_id", "url_id", "user_id", name="uq_user_url_agency_suggestions"), diff --git a/src/db/models/impl/url/suggestion/relevant/auto/sqlalchemy.py b/src/db/models/impl/url/suggestion/relevant/auto/sqlalchemy.py index 49dc7457..dd109269 100644 --- a/src/db/models/impl/url/suggestion/relevant/auto/sqlalchemy.py +++ b/src/db/models/impl/url/suggestion/relevant/auto/sqlalchemy.py @@ -5,7 +5,12 @@ from src.db.models.templates_.with_id import WithIDBase -class AutoRelevantSuggestion(UpdatedAtMixin, CreatedAtMixin, URLDependentMixin, WithIDBase): +class AutoRelevantSuggestion( + UpdatedAtMixin, + CreatedAtMixin, + URLDependentMixin, + WithIDBase +): __tablename__ = "auto_relevant_suggestions" relevant = Column(Boolean, nullable=True) diff --git a/src/db/models/mixins.py b/src/db/models/mixins.py index 12a0b2a1..417eae40 100644 --- a/src/db/models/mixins.py +++ b/src/db/models/mixins.py @@ -58,10 +58,16 @@ class AgencyDependentMixin: nullable=False ) - class CreatedAtMixin: created_at = get_created_at_column() +class LastSyncedAtMixin: + last_synced_at = Column( + TIMESTAMP, + nullable=False, + server_default=CURRENT_TIME_SERVER_DEFAULT + ) + class UpdatedAtMixin: updated_at = Column( diff --git a/src/db/queries/implementations/core/common/annotation_exists_/constants.py b/src/db/queries/implementations/core/common/annotation_exists_/constants.py index 1237634e..b5adfad9 100644 --- a/src/db/queries/implementations/core/common/annotation_exists_/constants.py +++ b/src/db/queries/implementations/core/common/annotation_exists_/constants.py @@ -1,5 +1,5 @@ from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion @@ -11,5 +11,5 @@ URLAutoAgencyIDSubtask, UserURLTypeSuggestion, UserRecordTypeSuggestion, - UserUrlAgencySuggestion + UserURLAgencySuggestion ] diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py index 4921337f..27240b7d 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/builder.py @@ -8,7 +8,7 @@ from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL from src.db.models.impl.batch.sqlalchemy import Batch -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.models.views.batch_url_status.core import BatchURLStatusMatView from src.db.models.views.batch_url_status.enums import BatchURLStatusEnum from src.db.queries.base.builder import QueryBuilderBase diff --git a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py index 5ab305cc..3b9e0c55 100644 --- a/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py +++ b/src/db/queries/implementations/core/get/recent_batch_summaries/url_counts/cte/submitted.py @@ -5,7 +5,7 @@ from src.db.models.impl.batch.sqlalchemy import Batch from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource from src.db.queries.implementations.core.get.recent_batch_summaries.url_counts.cte_container import \ URLCountsCTEContainer @@ -23,8 +23,8 @@ URL.id == LinkBatchURL.url_id, ) .join( - URLDataSource, - URLDataSource.url_id == URL.id, + DSAppLinkDataSource, + DSAppLinkDataSource.url_id == URL.id, ) .group_by( Batch.id diff --git a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py index 17136cce..395fe3f9 100644 --- a/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py +++ b/src/db/queries/implementations/core/metrics/urls/aggregated/pending.py @@ -6,7 +6,7 @@ from src.api.endpoints.metrics.dtos.get.urls.aggregated.pending import GetMetricsURLsAggregatedPendingResponseDTO from src.collectors.enums import URLStatus from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.models.mixins import URLDependentMixin @@ -25,7 +25,7 @@ def has_user_record_type_annotation(self): @property def has_user_agency_annotation(self): - return self.get_exists_for_model(UserUrlAgencySuggestion) + return self.get_exists_for_model(UserURLAgencySuggestion) def get_exists_for_model(self, model: Type[URLDependentMixin]): return self.query.c[ diff --git a/src/db/types.py b/src/db/types.py index dcee196f..073fec7c 100644 --- a/src/db/types.py +++ b/src/db/types.py @@ -1,10 +1,10 @@ from typing import TypeVar -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion from src.db.queries.base.labels import LabelsBase -UserSuggestionType = UserUrlAgencySuggestion | UserURLTypeSuggestion | UserRecordTypeSuggestion +UserSuggestionType = UserURLAgencySuggestion | UserURLTypeSuggestion | UserRecordTypeSuggestion LabelsType = TypeVar("LabelsType", bound=LabelsBase) \ No newline at end of file diff --git a/src/external/pdap/_templates/__init__.py b/src/external/pdap/_templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/_templates/request_builder.py b/src/external/pdap/_templates/request_builder.py new file mode 100644 index 00000000..387421f4 --- /dev/null +++ b/src/external/pdap/_templates/request_builder.py @@ -0,0 +1,39 @@ +from abc import ABC, abstractmethod +from http import HTTPStatus +from typing import Any + +from pdap_access_manager import AccessManager, RequestType, RequestInfo, ResponseInfo +from pydantic import BaseModel + + +class PDAPRequestBuilderBase(ABC): + + def __init__(self): + self.access_manager: AccessManager | None = None + + async def run(self, access_manager: AccessManager) -> Any: + self.access_manager = access_manager + return await self.inner_logic() + + def build_url(self, path: str) -> str: + return f"{self.access_manager.data_sources_url}/{path}" + + async def post( + self, + url: str, + model: BaseModel + ) -> dict: + request_info = RequestInfo( + type_=RequestType.POST, + url=url, + json_=model.model_dump(mode='json'), + headers=await self.access_manager.jwt_header() + ) + response_info: ResponseInfo = await self.access_manager.make_request(request_info) + if response_info.status_code != HTTPStatus.OK: + raise Exception(f"Failed to make request to PDAP: {response_info.data}") + return response_info.data + + @abstractmethod + async def inner_logic(self) -> Any: + raise NotImplementedError diff --git a/src/external/pdap/client.py b/src/external/pdap/client.py index 1c950ad3..944f8a88 100644 --- a/src/external/pdap/client.py +++ b/src/external/pdap/client.py @@ -2,13 +2,11 @@ from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType, ResponseInfo -from src.core.tasks.url.operators.submit_approved.tdo import SubmitApprovedURLTDO, SubmittedURLInfo +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase from src.external.pdap.dtos.match_agency.post import MatchAgencyInfo from src.external.pdap.dtos.match_agency.response import MatchAgencyResponse from src.external.pdap.dtos.unique_url_duplicate import UniqueURLDuplicateInfo from src.external.pdap.enums import MatchAgencyResponseStatus -from src.external.pdap.impl.meta_urls.core import submit_meta_urls -from src.external.pdap.impl.meta_urls.request import SubmitMetaURLsRequest class PDAPClient: @@ -19,6 +17,12 @@ def __init__( ): self.access_manager = access_manager + async def run_request_builder( + self, + request_builder: PDAPRequestBuilderBase + ) -> Any: + return await request_builder.run(self.access_manager) + async def match_agency( self, name: str, @@ -90,70 +94,3 @@ async def is_url_duplicate( ] is_duplicate: bool = (len(duplicates) != 0) return is_duplicate - - async def submit_data_source_urls( - self, - tdos: list[SubmitApprovedURLTDO] - ) -> list[SubmittedURLInfo]: - """ - Submits URLs to Data Sources App, - modifying tdos in-place with data source id or error - """ - request_url = self.access_manager.build_url( - namespace=DataSourcesNamespaces.SOURCE_COLLECTOR, - subdomains=["data-sources"] - ) - - # Build url-id dictionary - url_id_dict: dict[str, int] = {} - for tdo in tdos: - url_id_dict[tdo.url] = tdo.url_id - - data_sources_json: list[dict[str, Any]] = [] - for tdo in tdos: - data_sources_json.append( - { - "name": tdo.name, - "description": tdo.description, - "source_url": tdo.url, - "record_type": tdo.record_type.value, - "record_formats": tdo.record_formats, - "data_portal_type": tdo.data_portal_type, - "last_approval_editor": tdo.approving_user_id, - "supplying_entity": tdo.supplying_entity, - "agency_ids": tdo.agency_ids - } - ) - - headers: dict[str, str] = await self.access_manager.jwt_header() - request_info = RequestInfo( - type_=RequestType.POST, - url=request_url, - headers=headers, - json_={ - "data_sources": data_sources_json - } - ) - response_info: ResponseInfo = await self.access_manager.make_request(request_info) - data_sources_response_json: list[dict[str, Any]] = response_info.data["data_sources"] - - results: list[SubmittedURLInfo] = [] - for data_source in data_sources_response_json: - url: str = data_source["url"] - response_object = SubmittedURLInfo( - url_id=url_id_dict[url], - data_source_id=data_source["data_source_id"], - request_error=data_source["error"] - ) - results.append(response_object) - - return results - - async def submit_meta_urls( - self, - requests: list[SubmitMetaURLsRequest] - ): - return await submit_meta_urls( - self.access_manager, - requests=requests - ) \ No newline at end of file diff --git a/src/external/pdap/impl/meta_urls/core.py b/src/external/pdap/impl/meta_urls/core.py deleted file mode 100644 index 4a34fbeb..00000000 --- a/src/external/pdap/impl/meta_urls/core.py +++ /dev/null @@ -1,58 +0,0 @@ -from typing import Any - -from pdap_access_manager import AccessManager, DataSourcesNamespaces, RequestInfo, RequestType, ResponseInfo - -from src.external.pdap.impl.meta_urls.enums import SubmitMetaURLsStatus -from src.external.pdap.impl.meta_urls.request import SubmitMetaURLsRequest -from src.external.pdap.impl.meta_urls.response import SubmitMetaURLsResponse - - -async def submit_meta_urls( - access_manager: AccessManager, - requests: list[SubmitMetaURLsRequest] -) -> list[SubmitMetaURLsResponse]: - - - # Build url-id dictionary - url_id_dict: dict[str, int] = {} - for request in requests: - url_id_dict[request.url] = request.url_id - - meta_urls_json: list[dict[str, Any]] = [] - for request in requests: - meta_urls_json.append( - { - "url": request.url, - "agency_id": request.agency_id - } - ) - - headers: dict[str, str] = await access_manager.jwt_header() - url: str = access_manager.build_url( - namespace=DataSourcesNamespaces.SOURCE_COLLECTOR, - subdomains=["meta-urls"] - ) - request_info = RequestInfo( - type_=RequestType.POST, - url=url, - headers=headers, - json_={ - "meta_urls": meta_urls_json - } - ) - - response_info: ResponseInfo = await access_manager.make_request(request_info) - meta_urls_response_json: list[dict[str, Any]] = response_info.data["meta_urls"] - - responses: list[SubmitMetaURLsResponse] = [] - for meta_url in meta_urls_response_json: - responses.append( - SubmitMetaURLsResponse( - url=meta_url["url"], - status=SubmitMetaURLsStatus(meta_url["status"]), - agency_id=meta_url["agency_id"], - meta_url_id=meta_url["meta_url_id"], - error=meta_url["error"] - ) - ) - return responses \ No newline at end of file diff --git a/src/external/pdap/impl/meta_urls/enums.py b/src/external/pdap/impl/meta_urls/enums.py deleted file mode 100644 index e49e71aa..00000000 --- a/src/external/pdap/impl/meta_urls/enums.py +++ /dev/null @@ -1,7 +0,0 @@ -from enum import Enum - - -class SubmitMetaURLsStatus(Enum): - SUCCESS = "success" - FAILURE = "failure" - ALREADY_EXISTS = "already_exists" \ No newline at end of file diff --git a/src/external/pdap/impl/meta_urls/request.py b/src/external/pdap/impl/meta_urls/request.py deleted file mode 100644 index ac222aca..00000000 --- a/src/external/pdap/impl/meta_urls/request.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel - - -class SubmitMetaURLsRequest(BaseModel): - url_id: int - url: str - agency_id: int diff --git a/src/external/pdap/impl/meta_urls/response.py b/src/external/pdap/impl/meta_urls/response.py deleted file mode 100644 index 96d5ece7..00000000 --- a/src/external/pdap/impl/meta_urls/response.py +++ /dev/null @@ -1,11 +0,0 @@ -from pydantic import BaseModel - -from src.external.pdap.impl.meta_urls.enums import SubmitMetaURLsStatus - - -class SubmitMetaURLsResponse(BaseModel): - url: str - status: SubmitMetaURLsStatus - meta_url_id: int | None = None - agency_id: int | None = None - error: str | None = None \ No newline at end of file diff --git a/src/external/pdap/impl/sync/__init__.py b/src/external/pdap/impl/sync/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/__init__.py b/src/external/pdap/impl/sync/agencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/_shared/__init__.py b/src/external/pdap/impl/sync/agencies/_shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/_shared/models/__init__.py b/src/external/pdap/impl/sync/agencies/_shared/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/_shared/models/content.py b/src/external/pdap/impl/sync/agencies/_shared/models/content.py new file mode 100644 index 00000000..124072a7 --- /dev/null +++ b/src/external/pdap/impl/sync/agencies/_shared/models/content.py @@ -0,0 +1,15 @@ +from pydantic import Field, BaseModel + +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType + + +class AgencySyncContentModel(BaseModel): + # Required + name: str + jurisdiction_type: JurisdictionType + agency_type: AgencyType + location_ids: list[int] = Field(min_length=1) + + # Optional + no_web_presence: bool = False + defunct_year: int | None = None diff --git a/src/external/pdap/impl/sync/agencies/add/__init__.py b/src/external/pdap/impl/sync/agencies/add/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/add/core.py b/src/external/pdap/impl/sync/agencies/add/core.py new file mode 100644 index 00000000..276ff39d --- /dev/null +++ b/src/external/pdap/impl/sync/agencies/add/core.py @@ -0,0 +1,27 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.agencies.add.request import AddAgenciesOuterRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel, \ + DSAppSyncAddResponseModel + + +class AddAgenciesRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + request: AddAgenciesOuterRequest + ): + super().__init__() + self.request = request + + async def inner_logic(self) -> list[DSAppSyncAddResponseInnerModel]: + url: str = self.build_url("v3/source-manager/agencies/add") + raw_results = await self.post( + url=url, + model=self.request, + ) + response = DSAppSyncAddResponseModel(**raw_results) + return response.entities + + + + diff --git a/src/external/pdap/impl/sync/agencies/add/request.py b/src/external/pdap/impl/sync/agencies/add/request.py new file mode 100644 index 00000000..575b4c42 --- /dev/null +++ b/src/external/pdap/impl/sync/agencies/add/request.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, model_validator, Field + +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel + + +class AddAgenciesInnerRequest(BaseModel): + request_id: int + content: AgencySyncContentModel + + +class AddAgenciesOuterRequest(BaseModel): + agencies: list[AddAgenciesInnerRequest] = Field(max_length=1000) + + @model_validator(mode="after") + def all_request_ids_unique(self): + if len(self.agencies) != len( + set([agency.request_id for agency in self.agencies]) + ): + raise ValueError("All request_ids must be unique") + return self diff --git a/src/external/pdap/impl/sync/agencies/delete/__init__.py b/src/external/pdap/impl/sync/agencies/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/delete/core.py b/src/external/pdap/impl/sync/agencies/delete/core.py new file mode 100644 index 00000000..41c0cfd0 --- /dev/null +++ b/src/external/pdap/impl/sync/agencies/delete/core.py @@ -0,0 +1,22 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.shared.models.delete.request import DSAppSyncDeleteRequestModel + + +class DeleteAgenciesRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + ds_app_ids: list[int] + ): + super().__init__() + self.ds_app_ids = ds_app_ids + + async def inner_logic(self) -> None: + url: str = self.build_url("v3/source-manager/agencies/delete") + await self.post( + url=url, + model=DSAppSyncDeleteRequestModel( + ids=self.ds_app_ids + ) + ) + diff --git a/src/external/pdap/impl/sync/agencies/update/__init__.py b/src/external/pdap/impl/sync/agencies/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/agencies/update/core.py b/src/external/pdap/impl/sync/agencies/update/core.py new file mode 100644 index 00000000..4c5673ac --- /dev/null +++ b/src/external/pdap/impl/sync/agencies/update/core.py @@ -0,0 +1,19 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.agencies.update.request import UpdateAgenciesOuterRequest + + +class UpdateAgenciesRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + request: UpdateAgenciesOuterRequest + ): + super().__init__() + self.request = request + + async def inner_logic(self) -> None: + url: str = self.build_url("v3/source-manager/agencies/update") + await self.post( + url=url, + model=self.request + ) \ No newline at end of file diff --git a/src/external/pdap/impl/sync/agencies/update/request.py b/src/external/pdap/impl/sync/agencies/update/request.py new file mode 100644 index 00000000..df43578e --- /dev/null +++ b/src/external/pdap/impl/sync/agencies/update/request.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel, Field + +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel + + +class UpdateAgenciesInnerRequest(BaseModel): + app_id: int + content: AgencySyncContentModel + + +class UpdateAgenciesOuterRequest(BaseModel): + agencies: list[UpdateAgenciesInnerRequest] = Field(max_length=1000) diff --git a/src/external/pdap/impl/sync/data_sources/__init__.py b/src/external/pdap/impl/sync/data_sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/data_sources/_shared/__init__.py b/src/external/pdap/impl/sync/data_sources/_shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/data_sources/_shared/content.py b/src/external/pdap/impl/sync/data_sources/_shared/content.py new file mode 100644 index 00000000..d9403c63 --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/_shared/content.py @@ -0,0 +1,42 @@ +from datetime import date + +from pydantic import BaseModel, Field + +from src.core.enums import RecordType +from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, UpdateMethodEnum, \ + RetentionScheduleEnum, AccessTypeEnum +from src.external.pdap.enums import DataSourcesURLStatus +from src.external.pdap.impl.sync.data_sources._shared.enums import DetailLevel + + +class DataSourceSyncContentModel(BaseModel): + # Required + source_url: str + name: str + record_type: RecordType + + # Optional + description: str | None = None + + # Optional data source metadata + record_formats: list[str] | None = None + data_portal_type: str | None = None + supplying_entity: str | None = None + coverage_start: date | None = None + coverage_end: date | None = None + detail_level: DetailLevel | None = None + agency_supplied: bool | None = None + agency_originated: bool | None = None + agency_aggregation: AgencyAggregationEnum | None = None + agency_described_not_in_database: str | None = None + update_method: UpdateMethodEnum | None = None + readme_url: str | None = None + originating_entity: str | None = None + retention_schedule: RetentionScheduleEnum | None = None + scraper_url: str | None = None + access_notes: str | None = None + access_types: list[AccessTypeEnum] | None = None + data_portal_type_other: str | None = None + url_status: DataSourcesURLStatus | None = None + + agency_ids: list[int] = [] diff --git a/src/external/pdap/impl/sync/data_sources/_shared/enums.py b/src/external/pdap/impl/sync/data_sources/_shared/enums.py new file mode 100644 index 00000000..bc7929a2 --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/_shared/enums.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class DetailLevel(Enum): + """ + Correlates to the detail_level enum in the database + """ + + INDIVIDUAL = "Individual record" + AGGREGATED = "Aggregated records" + SUMMARIZED = "Summarized totals" diff --git a/src/external/pdap/impl/sync/data_sources/add/__init__.py b/src/external/pdap/impl/sync/data_sources/add/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/data_sources/add/core.py b/src/external/pdap/impl/sync/data_sources/add/core.py new file mode 100644 index 00000000..8eaa1b8b --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/add/core.py @@ -0,0 +1,24 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel, \ + DSAppSyncAddResponseModel + + +class AddDataSourcesRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + request: AddDataSourcesOuterRequest + ): + super().__init__() + self.request = request + + async def inner_logic(self) -> list[DSAppSyncAddResponseInnerModel]: + url: str = self.build_url("v3/source-manager/data-sources/add") + raw_results = await self.post( + url=url, + model=self.request, + ) + response = DSAppSyncAddResponseModel(**raw_results) + return response.entities + diff --git a/src/external/pdap/impl/sync/data_sources/add/request.py b/src/external/pdap/impl/sync/data_sources/add/request.py new file mode 100644 index 00000000..dfa7188f --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/add/request.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, Field, model_validator + +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel + + +class AddDataSourcesInnerRequest(BaseModel): + request_id: int + content: DataSourceSyncContentModel + + +class AddDataSourcesOuterRequest(BaseModel): + data_sources: list[AddDataSourcesInnerRequest] = Field(max_length=1000) + + @model_validator(mode="after") + def all_request_ids_unique(self): + if len(self.data_sources) != len( + set([data_source.request_id for data_source in self.data_sources]) + ): + raise ValueError("All request_ids must be unique") + return self diff --git a/src/external/pdap/impl/sync/data_sources/delete/__init__.py b/src/external/pdap/impl/sync/data_sources/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/data_sources/delete/core.py b/src/external/pdap/impl/sync/data_sources/delete/core.py new file mode 100644 index 00000000..7199c0ca --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/delete/core.py @@ -0,0 +1,22 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.shared.models.delete.request import DSAppSyncDeleteRequestModel + + +class DeleteDataSourcesRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + ds_app_ids: list[int] + ): + super().__init__() + self.ds_app_ids = ds_app_ids + + async def inner_logic(self) -> None: + url: str = self.build_url("v3/source-manager/data-sources/delete") + await self.post( + url=url, + model=DSAppSyncDeleteRequestModel( + ids=self.ds_app_ids + ) + ) + diff --git a/src/external/pdap/impl/sync/data_sources/request.py b/src/external/pdap/impl/sync/data_sources/request.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/data_sources/update/__init__.py b/src/external/pdap/impl/sync/data_sources/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/data_sources/update/core.py b/src/external/pdap/impl/sync/data_sources/update/core.py new file mode 100644 index 00000000..8bcaf57e --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/update/core.py @@ -0,0 +1,19 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesOuterRequest + + +class UpdateDataSourcesRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + request: UpdateDataSourcesOuterRequest + ): + super().__init__() + self.request = request + + async def inner_logic(self) -> None: + url: str = self.build_url("v3/source-manager/data-sources/update") + await self.post( + url=url, + model=self.request + ) \ No newline at end of file diff --git a/src/external/pdap/impl/sync/data_sources/update/request.py b/src/external/pdap/impl/sync/data_sources/update/request.py new file mode 100644 index 00000000..97d95818 --- /dev/null +++ b/src/external/pdap/impl/sync/data_sources/update/request.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel, Field + +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel + + +class UpdateDataSourcesInnerRequest(BaseModel): + class Config: + arbitrary_types_allowed = True + + app_id: int + content: DataSourceSyncContentModel + + +class UpdateDataSourcesOuterRequest(BaseModel): + data_sources: list[UpdateDataSourcesInnerRequest] = Field(max_length=1000) diff --git a/src/external/pdap/impl/sync/meta_urls/__init__.py b/src/external/pdap/impl/sync/meta_urls/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/meta_urls/_shared/__init__.py b/src/external/pdap/impl/sync/meta_urls/_shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/meta_urls/_shared/content.py b/src/external/pdap/impl/sync/meta_urls/_shared/content.py new file mode 100644 index 00000000..9d81b3d7 --- /dev/null +++ b/src/external/pdap/impl/sync/meta_urls/_shared/content.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class MetaURLSyncContentModel(BaseModel): + url: str + agency_ids: list[int] = [] diff --git a/src/external/pdap/impl/sync/meta_urls/add/__init__.py b/src/external/pdap/impl/sync/meta_urls/add/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/meta_urls/add/core.py b/src/external/pdap/impl/sync/meta_urls/add/core.py new file mode 100644 index 00000000..98d6f016 --- /dev/null +++ b/src/external/pdap/impl/sync/meta_urls/add/core.py @@ -0,0 +1,25 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseInnerModel, \ + DSAppSyncAddResponseModel + + +class AddMetaURLsRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + request: AddMetaURLsOuterRequest + ): + super().__init__() + self.request = request + + async def inner_logic(self) -> list[DSAppSyncAddResponseInnerModel]: + url: str = self.build_url("v3/source-manager/meta-urls/add") + raw_results = await self.post( + url=url, + model=self.request, + ) + response = DSAppSyncAddResponseModel(**raw_results) + return response.entities + + diff --git a/src/external/pdap/impl/sync/meta_urls/add/request.py b/src/external/pdap/impl/sync/meta_urls/add/request.py new file mode 100644 index 00000000..109560a2 --- /dev/null +++ b/src/external/pdap/impl/sync/meta_urls/add/request.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, Field, model_validator + +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel + + +class AddMetaURLsInnerRequest(BaseModel): + request_id: int + content: MetaURLSyncContentModel + + +class AddMetaURLsOuterRequest(BaseModel): + meta_urls: list[AddMetaURLsInnerRequest] = Field(max_length=1000) + + @model_validator(mode="after") + def all_request_ids_unique(self): + if len(self.meta_urls) != len( + set([meta_url.request_id for meta_url in self.meta_urls]) + ): + raise ValueError("All request_ids must be unique") + return self diff --git a/src/external/pdap/impl/sync/meta_urls/delete/__init__.py b/src/external/pdap/impl/sync/meta_urls/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/meta_urls/delete/core.py b/src/external/pdap/impl/sync/meta_urls/delete/core.py new file mode 100644 index 00000000..abdc3a6b --- /dev/null +++ b/src/external/pdap/impl/sync/meta_urls/delete/core.py @@ -0,0 +1,24 @@ +from pdap_access_manager import AccessManager + +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.shared.models.delete.request import DSAppSyncDeleteRequestModel + + +class DeleteMetaURLsRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + ds_app_ids: list[int] + ): + super().__init__() + self.ds_app_ids = ds_app_ids + + async def inner_logic(self) -> None: + url: str = self.build_url("v3/source-manager/meta-urls/delete") + await self.post( + url=url, + model=DSAppSyncDeleteRequestModel( + ids=self.ds_app_ids + ) + ) + diff --git a/src/external/pdap/impl/sync/meta_urls/update/__init__.py b/src/external/pdap/impl/sync/meta_urls/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/meta_urls/update/core.py b/src/external/pdap/impl/sync/meta_urls/update/core.py new file mode 100644 index 00000000..37e84da9 --- /dev/null +++ b/src/external/pdap/impl/sync/meta_urls/update/core.py @@ -0,0 +1,19 @@ +from src.external.pdap._templates.request_builder import PDAPRequestBuilderBase +from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest + + +class UpdateMetaURLsRequestBuilder(PDAPRequestBuilderBase): + + def __init__( + self, + request: UpdateMetaURLsOuterRequest + ): + super().__init__() + self.request = request + + async def inner_logic(self) -> None: + url: str = self.build_url("v3/source-manager/meta-urls/update") + await self.post( + url=url, + model=self.request + ) \ No newline at end of file diff --git a/src/external/pdap/impl/sync/meta_urls/update/request.py b/src/external/pdap/impl/sync/meta_urls/update/request.py new file mode 100644 index 00000000..c38ae09e --- /dev/null +++ b/src/external/pdap/impl/sync/meta_urls/update/request.py @@ -0,0 +1,12 @@ +from pydantic import Field, BaseModel + +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel + + +class UpdateMetaURLsInnerRequest(BaseModel): + app_id: int + content: MetaURLSyncContentModel + + +class UpdateMetaURLsOuterRequest(BaseModel): + meta_urls: list[UpdateMetaURLsInnerRequest] = Field(max_length=1000) diff --git a/src/external/pdap/impl/sync/shared/__init__.py b/src/external/pdap/impl/sync/shared/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/shared/models/__init__.py b/src/external/pdap/impl/sync/shared/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/shared/models/add/__init__.py b/src/external/pdap/impl/sync/shared/models/add/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/shared/models/add/response.py b/src/external/pdap/impl/sync/shared/models/add/response.py new file mode 100644 index 00000000..209139cf --- /dev/null +++ b/src/external/pdap/impl/sync/shared/models/add/response.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + +class DSAppSyncAddResponseInnerModel(BaseModel): + request_id: int + app_id: int + +class DSAppSyncAddResponseModel(BaseModel): + entities: list[DSAppSyncAddResponseInnerModel] \ No newline at end of file diff --git a/src/external/pdap/impl/sync/shared/models/delete/__init__.py b/src/external/pdap/impl/sync/shared/models/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/external/pdap/impl/sync/shared/models/delete/request.py b/src/external/pdap/impl/sync/shared/models/delete/request.py new file mode 100644 index 00000000..c4e3bb8d --- /dev/null +++ b/src/external/pdap/impl/sync/shared/models/delete/request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class DSAppSyncDeleteRequestModel(BaseModel): + ids: list[int] \ No newline at end of file diff --git a/src/external/pdap/impl/sync/shared/models/mapping.py b/src/external/pdap/impl/sync/shared/models/mapping.py new file mode 100644 index 00000000..fd22bca2 --- /dev/null +++ b/src/external/pdap/impl/sync/shared/models/mapping.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class DSSyncIDMapping(BaseModel): + ds_app_link_id: int + entity_id: int \ No newline at end of file diff --git a/tests/automated/integration/api/agencies/delete/__init__.py b/tests/automated/integration/api/agencies/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/agencies/test_core.py b/tests/automated/integration/api/agencies/delete/test_core.py similarity index 87% rename from tests/automated/integration/api/agencies/test_core.py rename to tests/automated/integration/api/agencies/delete/test_core.py index a986cacc..be8fb9fa 100644 --- a/tests/automated/integration/api/agencies/test_core.py +++ b/tests/automated/integration/api/agencies/delete/test_core.py @@ -35,27 +35,27 @@ async def test_agencies( link: LinkAgencyLocation = await ath.adb_client().one_or_none_model(model=LinkAgencyLocation) assert link is not None - assert link.agency_id == agency.agency_id + assert link.agency_id == agency.id assert link.location_id == california.location_id rv.delete_v3( - url=f"/agencies/{agency.agency_id}/locations/{california.location_id}", + url=f"/agencies/{agency.id}/locations/{california.location_id}", ) link: LinkAgencyLocation | None = await ath.adb_client().one_or_none_model(model=LinkAgencyLocation) assert link is None rv.post_v3( - url=f"/agencies/{agency.agency_id}/locations/{pennsylvania.location_id}", + url=f"/agencies/{agency.id}/locations/{pennsylvania.location_id}", ) link: LinkAgencyLocation = await ath.adb_client().one_or_none_model(model=LinkAgencyLocation) assert link is not None - assert link.agency_id == agency.agency_id + assert link.agency_id == agency.id assert link.location_id == pennsylvania.location_id rv.put_v3( - url=f"/agencies/{agency.agency_id}", + url=f"/agencies/{agency.id}", json=AgencyPutRequest( name="Test Agency Updated", ).model_dump(mode="json") @@ -68,7 +68,7 @@ async def test_agencies( rv.delete_v3( - url=f"/agencies/{agency.agency_id}", + url=f"/agencies/{agency.id}", ) agency: Agency | None = await ath.adb_client().one_or_none_model(model=Agency) diff --git a/tests/automated/integration/api/agencies/delete/test_ds_linked.py b/tests/automated/integration/api/agencies/delete/test_ds_linked.py new file mode 100644 index 00000000..0470c75e --- /dev/null +++ b/tests/automated/integration/api/agencies/delete/test_ds_linked.py @@ -0,0 +1,44 @@ +import pytest + +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.db.models.impl.flag.ds_delete.agency import FlagDSDeleteAgency +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.counter import next_int + + +@pytest.mark.asyncio +async def test_ds_linked( + api_test_helper: APITestHelper +): + """If an agency has been linked to the Data Sources App, + the deletion operation should include an agency flag for deletion. + """ + + agency = Agency( + name="Test Agency", + agency_type=AgencyType.LAW_ENFORCEMENT, + jurisdiction_type=JurisdictionType.STATE, + ) + agency_id: int = await api_test_helper.adb_client().add(agency, return_id=True) + + ds_agency_id: int = next_int() + # Add DS link + ds_link = DSAppLinkAgency( + agency_id=agency_id, + ds_agency_id=ds_agency_id, + ) + await api_test_helper.adb_client().add(ds_link) + + api_test_helper.request_validator.delete_v3( + url=f"/agencies/{agency.id}", + ) + + agency: Agency | None = await api_test_helper.adb_client().one_or_none_model(model=Agency) + assert agency is None + + flag: FlagDSDeleteAgency | None = await api_test_helper.adb_client().one_or_none_model(model=FlagDSDeleteAgency) + assert flag is not None + assert flag.ds_agency_id == ds_agency_id + diff --git a/tests/automated/integration/api/annotate/all/test_happy_path.py b/tests/automated/integration/api/annotate/all/test_happy_path.py index 48b60b8b..e9fae81e 100644 --- a/tests/automated/integration/api/annotate/all/test_happy_path.py +++ b/tests/automated/integration/api/annotate/all/test_happy_path.py @@ -10,7 +10,7 @@ from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion @@ -110,7 +110,7 @@ async def test_annotate_all( assert suggested_types == {URLType.DATA_SOURCE, URLType.NOT_RELEVANT} # Should be one agency - all_agency_suggestions = await adb_client.get_all(UserUrlAgencySuggestion) + all_agency_suggestions = await adb_client.get_all(UserURLAgencySuggestion) assert len(all_agency_suggestions) == 3 suggested_agency_ids: set[int] = {sugg.agency_id for sugg in all_agency_suggestions} assert agency_id in suggested_agency_ids diff --git a/tests/automated/integration/api/data_sources/agencies/test_add_remove.py b/tests/automated/integration/api/data_sources/agencies/test_add_remove.py index 7223c8ce..42a82e11 100644 --- a/tests/automated/integration/api/data_sources/agencies/test_add_remove.py +++ b/tests/automated/integration/api/data_sources/agencies/test_add_remove.py @@ -6,21 +6,22 @@ async def test_agencies_add_remove( api_test_helper: APITestHelper, test_url_data_source_id: int, + test_agency_id_2: int, test_agency_id: int ): api_test_helper.request_validator.post_v3( - url=f"/data-sources/{test_url_data_source_id}/agencies/{test_agency_id}", + url=f"/data-sources/{test_url_data_source_id}/agencies/{test_agency_id_2}", ) adb_client: AsyncDatabaseClient = api_test_helper.adb_client() links: list[LinkURLAgency] = await adb_client.get_all(LinkURLAgency) - assert len(links) == 1 - assert links[0].agency_id == test_agency_id - assert links[0].url_id == test_url_data_source_id + assert len(links) == 2 + assert {link.agency_id for link in links} == {test_agency_id_2, test_agency_id} + assert {link.url_id for link in links} == {test_url_data_source_id} api_test_helper.request_validator.delete_v3( - url=f"/data-sources/{test_url_data_source_id}/agencies/{test_agency_id}", + url=f"/data-sources/{test_url_data_source_id}/agencies/{test_agency_id_2}", ) links: list[LinkURLAgency] = await adb_client.get_all(LinkURLAgency) - assert len(links) == 0 \ No newline at end of file + assert len(links) == 1 \ No newline at end of file diff --git a/tests/automated/integration/api/meta_urls/agencies/test_add_remove.py b/tests/automated/integration/api/meta_urls/agencies/test_add_remove.py index 4f48ac5c..1bd90ea2 100644 --- a/tests/automated/integration/api/meta_urls/agencies/test_add_remove.py +++ b/tests/automated/integration/api/meta_urls/agencies/test_add_remove.py @@ -5,26 +5,27 @@ async def test_agencies_add_remove( api_test_helper: APITestHelper, test_url_meta_url_id: int, - test_agency_id: int + test_agency_id: int, + test_agency_id_2: int ): api_test_helper.request_validator.post_v3( - url=f"/meta-urls/{test_url_meta_url_id}/agencies/{test_agency_id}", + url=f"/meta-urls/{test_url_meta_url_id}/agencies/{test_agency_id_2}", ) raw_response: dict = api_test_helper.request_validator.get_v3( url=f"/meta-urls/{test_url_meta_url_id}/agencies", ) response = AgencyGetOuterResponse(**raw_response) - assert len(response.results) == 1 - assert response.results[0].id == test_agency_id + assert len(response.results) == 2 + assert {result.id for result in response.results} == {test_agency_id, test_agency_id_2} api_test_helper.request_validator.delete_v3( - url=f"/meta-urls/{test_url_meta_url_id}/agencies/{test_agency_id}", + url=f"/meta-urls/{test_url_meta_url_id}/agencies/{test_agency_id_2}", ) raw_response: dict = api_test_helper.request_validator.get_v3( url=f"/meta-urls/{test_url_meta_url_id}/agencies", ) response = AgencyGetOuterResponse(**raw_response) - assert len(response.results) == 0 + assert len(response.results) == 1 diff --git a/tests/automated/integration/api/meta_urls/test_invalid_type.py b/tests/automated/integration/api/meta_urls/test_invalid_type.py index 12073191..b3e98a3d 100644 --- a/tests/automated/integration/api/meta_urls/test_invalid_type.py +++ b/tests/automated/integration/api/meta_urls/test_invalid_type.py @@ -1,6 +1,6 @@ import pytest -from src.api.endpoints.meta_url.by_id.agencies.put.request import UpdateMetaURLRequest +from src.api.endpoints.meta_url.by_id.put.request import UpdateMetaURLRequest from tests.helpers.api_test_helper import APITestHelper from tests.helpers.check import check_forbidden_url_type diff --git a/tests/automated/integration/api/meta_urls/test_put.py b/tests/automated/integration/api/meta_urls/test_put.py index 28689a8b..1c493009 100644 --- a/tests/automated/integration/api/meta_urls/test_put.py +++ b/tests/automated/integration/api/meta_urls/test_put.py @@ -1,6 +1,6 @@ import pytest -from src.api.endpoints.meta_url.by_id.agencies.put.request import UpdateMetaURLRequest +from src.api.endpoints.meta_url.by_id.put.request import UpdateMetaURLRequest from src.db.client.async_ import AsyncDatabaseClient from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL from src.db.models.impl.url.core.sqlalchemy import URL diff --git a/tests/automated/integration/api/submit/test_url_maximal.py b/tests/automated/integration/api/submit/test_url_maximal.py index 8d1930f5..150b5409 100644 --- a/tests/automated/integration/api/submit/test_url_maximal.py +++ b/tests/automated/integration/api/submit/test_url_maximal.py @@ -8,7 +8,7 @@ from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.suggestion.agency.user import UserUrlAgencySuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion @@ -54,9 +54,9 @@ async def test_maximal( link: LinkUserSubmittedURL = links[0] assert link.url_id == url_id - agen_suggs: list[UserUrlAgencySuggestion] = await adb_client.get_all(UserUrlAgencySuggestion) + agen_suggs: list[UserURLAgencySuggestion] = await adb_client.get_all(UserURLAgencySuggestion) assert len(agen_suggs) == 1 - agen_sugg: UserUrlAgencySuggestion = agen_suggs[0] + agen_sugg: UserURLAgencySuggestion = agen_suggs[0] assert agen_sugg.url_id == url_id assert agen_sugg.agency_id == agency_id diff --git a/tests/automated/integration/api/url/by_id/delete/__init__.py b/tests/automated/integration/api/url/by_id/delete/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/url/by_id/delete/setup.py b/tests/automated/integration/api/url/by_id/delete/setup.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/api/url/by_id/delete/test_any_url.py b/tests/automated/integration/api/url/by_id/delete/test_any_url.py new file mode 100644 index 00000000..579da570 --- /dev/null +++ b/tests/automated/integration/api/url/by_id/delete/test_any_url.py @@ -0,0 +1,448 @@ +import pytest +from sqlalchemy import select + +from src.core.enums import RecordType +from src.db.client.async_ import AsyncDatabaseClient +from src.db.dtos.url.mapping_.simple import SimpleURLMapping +from src.db.enums import ChangeLogOperationType +from src.db.models.impl.change_log import ChangeLog +from src.db.models.impl.flag.checked_for_ia.sqlalchemy import FlagURLCheckedForInternetArchives +from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL +from src.db.models.impl.flag.url_suspended.sqlalchemy import FlagURLSuspended +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.link.url_redirect_url.sqlalchemy import LinkURLRedirectURL +from src.db.models.impl.link.urls_root_url.sqlalchemy import LinkURLRootURL +from src.db.models.impl.link.user_name_suggestion.sqlalchemy import LinkUserNameSuggestion +from src.db.models.impl.link.user_suggestion_not_found.agency.sqlalchemy import LinkUserSuggestionAgencyNotFound +from src.db.models.impl.link.user_suggestion_not_found.location.sqlalchemy import LinkUserSuggestionLocationNotFound +from src.db.models.impl.link.user_suggestion_not_found.users_submitted_url.sqlalchemy import LinkUserSubmittedURL +from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.html.compressed.sqlalchemy import URLCompressedHTML +from src.db.models.impl.url.html.content.sqlalchemy import URLHTMLContent +from src.db.models.impl.url.internet_archives.probe.sqlalchemy import URLInternetArchivesProbeMetadata +from src.db.models.impl.url.internet_archives.save.sqlalchemy import URLInternetArchivesSaveMetadata +from src.db.models.impl.url.screenshot.sqlalchemy import URLScreenshot +from src.db.models.impl.url.suggestion.agency.subtask.enum import SubtaskDetailCode, AutoAgencyIDSubtaskType +from src.db.models.impl.url.suggestion.agency.subtask.sqlalchemy import URLAutoAgencyIDSubtask +from src.db.models.impl.url.suggestion.agency.suggestion.sqlalchemy import AgencyIDSubtaskSuggestion +from src.db.models.impl.url.suggestion.agency.user import UserURLAgencySuggestion +from src.db.models.impl.url.suggestion.anonymous.agency.sqlalchemy import AnonymousAnnotationAgency +from src.db.models.impl.url.suggestion.anonymous.location.sqlalchemy import AnonymousAnnotationLocation +from src.db.models.impl.url.suggestion.anonymous.record_type.sqlalchemy import AnonymousAnnotationRecordType +from src.db.models.impl.url.suggestion.anonymous.url_type.sqlalchemy import AnonymousAnnotationURLType +from src.db.models.impl.url.suggestion.location.auto.subtask.enums import LocationIDSubtaskType +from src.db.models.impl.url.suggestion.location.auto.subtask.sqlalchemy import AutoLocationIDSubtask +from src.db.models.impl.url.suggestion.location.auto.suggestion.sqlalchemy import LocationIDSubtaskSuggestion +from src.db.models.impl.url.suggestion.location.user.sqlalchemy import UserLocationSuggestion +from src.db.models.impl.url.suggestion.name.enums import NameSuggestionSource +from src.db.models.impl.url.suggestion.name.sqlalchemy import URLNameSuggestion +from src.db.models.impl.url.suggestion.record_type.auto import AutoRecordTypeSuggestion +from src.db.models.impl.url.suggestion.record_type.user import UserRecordTypeSuggestion +from src.db.models.impl.url.suggestion.relevant.auto.sqlalchemy import AutoRelevantSuggestion +from src.db.models.impl.url.suggestion.relevant.user import UserURLTypeSuggestion +from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError +from src.db.models.impl.url.web_metadata.sqlalchemy import URLWebMetadata +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo + + +@pytest.mark.asyncio +async def test_any_url( + pittsburgh_locality: LocalityCreationInfo, + db_data_creator: DBDataCreator, + test_agency_id: int, + api_test_helper: APITestHelper +): + """ + Test that deletion works properly for a URL that has all possible attributes + that any URL could have + """ + + url_id: int = await _setup( + ddc=db_data_creator, + pittsburgh_id=pittsburgh_locality.location_id, + agency_id=test_agency_id + ) + api_test_helper.request_validator.delete_v3( + f"url/{url_id}" + ) + await _check_results(url_id, dbc=db_data_creator.adb_client) + + + +async def _check_results( + url_id: int, + dbc: AsyncDatabaseClient +) -> None: + # There should be only two urls present in the database, neither matching URL id + urls: list[URL] = await dbc.get_all(URL) + assert len(urls) == 2 + assert url_id not in (url.id for url in urls) + + # For the following models, there should no longer be any entries in the database. + models = [ + # Batch Link + LinkBatchURL, + # MISCELLANEOUS + ## Flag Root URL + FlagRootURL, + ## URL Task Error + URLTaskError, + ## URL Checked for Duplicate + URLCheckedForDuplicate, + ## Flag URL Suspended + FlagURLSuspended, + # LINKS + ## Link URLs Redirect URL + LinkURLRedirectURL, + ## Link URLs Root URL + LinkURLRootURL, + ## Link User Submitted URLs + LinkUserSubmittedURL, + ## Link User Suggestion Agency Not Found + LinkUserSuggestionAgencyNotFound, + ## Link User Suggestion Location Not Found + LinkUserSuggestionLocationNotFound, + # WEB DATA + ## URL Compressed HTML + URLCompressedHTML, + ## URL HTML Content + URLHTMLContent, + ## URL Screenshot + URLScreenshot, + ## URL Web Metadata + URLWebMetadata, + # INTERNET ARCHIVES + ## Flag URL Checked for Internet Archives + FlagURLCheckedForInternetArchives, + ## URL Internet Archives Probe Metadata + URLInternetArchivesProbeMetadata, + ## URL Internet Archives Save Metadata + URLInternetArchivesSaveMetadata, + # ANNOTATIONS + ## AUTO + ### Agency + URLAutoAgencyIDSubtask, + AgencyIDSubtaskSuggestion, + ### Record Type + AutoRecordTypeSuggestion, + ### URL Type + AutoRelevantSuggestion, + ### Location + AutoLocationIDSubtask, + LocationIDSubtaskSuggestion, + ## USER + ### Agency + UserURLAgencySuggestion, + ### Record Type + UserRecordTypeSuggestion, + ### URL Type + UserURLTypeSuggestion, + ### Location + UserLocationSuggestion, + URLNameSuggestion, + ## ANONYMOUS + ### Agency + AnonymousAnnotationAgency, + ### Location + AnonymousAnnotationLocation, + ### Record Type + AnonymousAnnotationRecordType, + ### URL Type + AnonymousAnnotationURLType, + ] + for model in models: + assert await dbc.get_all(model) == [] + + # The Change Log should show, at minimum, the deletion of the URL + query = ( + select( + ChangeLog + ) + .where( + ChangeLog.table_name == "urls", + ChangeLog.operation_type == ChangeLogOperationType.DELETE + ) + ) + result = dbc.one_or_none(query) + assert result is not None + + +async def _setup( + ddc: DBDataCreator, + pittsburgh_id: int, + agency_id: int +) -> int: + dbc: AsyncDatabaseClient = ddc.adb_client + # URL & Batch Link + url: SimpleURLMapping = (await ddc.create_urls( + record_type=None + ))[0] + + # MISCELLANEOUS + ## Flag Root URL + await ddc.flag_as_root(url_ids=[url.url_id]) + ## URL Task Error + ### Task + task_id: int = await ddc.task(url_ids=[url.url_id]) + ### Error + await ddc.task_errors(url_ids=[url.url_id], task_id=task_id) + ## URL Checked for Duplicate + await dbc.add( + URLCheckedForDuplicate( + url_id=url.url_id + ) + ) + ## Flag URL Suspended + await dbc.add( + FlagURLSuspended( + url_id=url.url_id + ) + ) + # LINKS + ## Link URLs Redirect URL + ### Additional url + additional_url: SimpleURLMapping = (await ddc.create_urls( + record_type=None + ))[0] + ### Redirect url + await dbc.add( + LinkURLRedirectURL( + source_url_id=url.url_id, + destination_url_id=additional_url.url_id + ) + ) + ### (We will go in both directions even though this should technically not be legal) + await dbc.add( + LinkURLRedirectURL( + source_url_id=additional_url.url_id, + destination_url_id=url.url_id + ) + ) + ## Link URLs Root URL + ### (Again, will go in both directions despite this not being legal) + root_url: SimpleURLMapping = (await ddc.create_urls( + record_type=None + ))[0] + await dbc.add( + LinkURLRootURL( + url_id=url.url_id, + root_url_id=root_url.url_id + ) + ) + await dbc.add( + LinkURLRootURL( + url_id=root_url.url_id, + root_url_id=url.url_id + ) + ) + ## Link User Submitted URL + await dbc.add( + LinkUserSubmittedURL( + url_id=url.url_id, + user_id=1 + ) + ) + ## Link User Suggestion Agency Not Found + await dbc.add( + LinkUserSuggestionAgencyNotFound( + url_id=url.url_id, + user_id=1 + ) + ) + ## Link User Suggestion Location Not Found + await dbc.add( + LinkUserSuggestionLocationNotFound( + url_id=url.url_id, + user_id=1 + ) + ) + # WEB DATA + ## URL Compressed HTML + await ddc.add_compressed_html( + url_ids=[url.url_id] + ) + ## URL HTML Content + await dbc.add( + URLHTMLContent( + url_id=url.url_id, + content_type="Title", + content="Test Title" + ) + ) + ## URL Screenshot + await dbc.add( + URLScreenshot( + url_id=url.url_id, + content=b"Test Screenshot", + file_size=1024 + ) + ) + ## URL Web Metadata + await ddc.create_web_metadata( + url_ids=[url.url_id] + ) + # INTERNET ARCHIVES + ## Flag URL Checked for Internet Archives + await dbc.add( + FlagURLCheckedForInternetArchives( + url_id=url.url_id, + success=True + ) + ) + ## URL Internet Archives Probe Metadata + await dbc.add( + URLInternetArchivesProbeMetadata( + url_id=url.url_id, + archive_url="https://example.com", + digest="test_digest", + length=1024, + ) + ) + ## URL Internet Archives Save Metadata + await dbc.add( + URLInternetArchivesSaveMetadata( + url_id=url.url_id, + ) + ) + # ANNOTATIONS + ## AUTO + ### Agency + #### Subtask + agency_subtask_id: int = await dbc.add( + URLAutoAgencyIDSubtask( + url_id=url.url_id, + task_id=task_id, + agencies_found=True, + type=AutoAgencyIDSubtaskType.NLP_LOCATION_MATCH, + detail=SubtaskDetailCode.NO_DETAILS + ), + return_id=True + ) + ### Suggestion + await dbc.add( + AgencyIDSubtaskSuggestion( + subtask_id=agency_subtask_id, + agency_id=agency_id, + confidence=60 + ) + ) + ### Record Type + await dbc.add( + AutoRecordTypeSuggestion( + url_id=url.url_id, + record_type=RecordType.BOOKING_REPORTS.value + ) + ) + ### Relevant + await dbc.add( + AutoRelevantSuggestion( + url_id=url.url_id, + relevant=True, + confidence=0.5, + model_name="Test Model" + ) + ) + ### Location + #### Subtask + location_subtask_id: int = await dbc.add( + AutoLocationIDSubtask( + url_id=url.url_id, + task_id=task_id, + locations_found=True, + type=LocationIDSubtaskType.NLP_LOCATION_FREQUENCY, + ), + return_id=True + ) + #### Suggestion + await dbc.add( + LocationIDSubtaskSuggestion( + subtask_id=location_subtask_id, + location_id=pittsburgh_id, + confidence=50 + ) + ) + ## USER + ### Agency + await dbc.add( + UserURLAgencySuggestion( + url_id=url.url_id, + user_id=1, + agency_id=agency_id, + is_new=False + ) + ) + ### Record Type + await dbc.add( + UserRecordTypeSuggestion( + url_id=url.url_id, + user_id=1, + record_type=RecordType.BOOKING_REPORTS.value, + ) + ) + ### URL Type + await dbc.add( + UserURLTypeSuggestion( + url_id=url.url_id, + type=URLType.INDIVIDUAL_RECORD, + user_id=1 + ) + ) + ### Location + await dbc.add( + UserLocationSuggestion( + url_id=url.url_id, + location_id=pittsburgh_id, + user_id=1, + ) + ) + ### Name + name_suggestion_id: int = await dbc.add( + URLNameSuggestion( + url_id=url.url_id, + suggestion="Test Name", + source=NameSuggestionSource.USER, + ), + return_id=True + ) + await dbc.add( + LinkUserNameSuggestion( + suggestion_id=name_suggestion_id, + user_id=1, + ) + ) + ## ANONYMOUS + for model in [ + ### Agency + AnonymousAnnotationAgency( + url_id=url.url_id, + agency_id=agency_id + ), + ### Record Type + AnonymousAnnotationRecordType( + url_id=url.url_id, + record_type=RecordType.BOOKING_REPORTS.value + ), + ### URL Type + AnonymousAnnotationURLType( + url_id=url.url_id, + url_type=URLType.INDIVIDUAL_RECORD + ), + ### Location + AnonymousAnnotationLocation( + url_id=url.url_id, + location_id=pittsburgh_id + ) + ]: + await dbc.add(model) + + return url.url_id + + + + + + diff --git a/tests/automated/integration/api/url/by_id/delete/test_data_source_url.py b/tests/automated/integration/api/url/by_id/delete/test_data_source_url.py new file mode 100644 index 00000000..d551118b --- /dev/null +++ b/tests/automated/integration/api/url/by_id/delete/test_data_source_url.py @@ -0,0 +1,115 @@ +from datetime import date + +import pytest + +from src.core.enums import RecordType +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.ds_delete.data_source import FlagDSDeleteDataSource +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.models.impl.url.optional_ds_metadata.enums import AccessTypeEnum, RetentionScheduleEnum, UpdateMethodEnum, \ + AgencyAggregationEnum +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_data_source_url( + db_data_creator: DBDataCreator, + api_test_helper: APITestHelper, + test_agency_id: int +): + """ + Test that deletion works properly for a URL that is a validated data source + and has all data source-only attributes. + """ + + url_id: int = await _setup( + ddc=db_data_creator, + agency_id=test_agency_id + ) + api_test_helper.request_validator.delete_v3( + f"url/{url_id}" + ) + await _check_results( + dbc=db_data_creator.adb_client + ) + +async def _check_results( + dbc: AsyncDatabaseClient +) -> None: + pass + # CHECK + ## URL and all associated tables should be deleted + assert await dbc.has_no_rows(URL) + + ### Record Type should be deleted + assert await dbc.has_no_rows(URLOptionalDataSourceMetadata) + assert await dbc.has_no_rows(LinkURLAgency) + assert await dbc.has_no_rows(URLRecordType) + + ## DS App Link should not yet be deleted + app_link: DSAppLinkDataSource = await dbc.one_or_none_model(DSAppLinkDataSource) + assert app_link is not None + + ## DS App Data Source Deletion Flag should be added + flag: FlagDSDeleteDataSource = await dbc.one_or_none_model(FlagDSDeleteDataSource) + assert flag is not None + assert flag.ds_data_source_id == app_link.ds_data_source_id + + +async def _setup( + ddc: DBDataCreator, + agency_id: int +) -> int: + pass + # SETUP + ## Validated Flag - Data Source + ## Record Type + url_id: int = (await ddc.create_validated_urls( + validation_type=URLType.DATA_SOURCE, + record_type=RecordType.BOOKING_REPORTS, + count=1 + ))[0].url_id + + ## Link Agency + await ddc.create_url_agency_links( + url_ids=[url_id], + agency_ids=[agency_id] + ) + + ## Optional DS Metadata + optional_ds_metadata = URLOptionalDataSourceMetadata( + url_id=url_id, + record_formats=["csv", "pdf"], + data_portal_type="CKAN", + supplying_entity="ReadOnly Agency", + coverage_start=date(year=2025, month=6, day=1), + coverage_end=date(year=2025, month=8, day=20), + agency_supplied=False, + agency_originated=True, + agency_aggregation=AgencyAggregationEnum.LOCALITY, + agency_described_not_in_database="ReadOnly Agency Not In DB", + update_method=UpdateMethodEnum.NO_UPDATES, + readme_url="https://read-only-readme.com", + originating_entity="ReadOnly Agency Originating", + retention_schedule=RetentionScheduleEnum.GT_10_YEARS, + scraper_url="https://read-only-scraper.com", + submission_notes="Read Only Submission Notes", + access_notes="Read Only Access Notes", + access_types=[AccessTypeEnum.WEBPAGE, AccessTypeEnum.API], + ) + await ddc.adb_client.add(optional_ds_metadata) + + ## DS App Link + app_link = DSAppLinkDataSource( + url_id=url_id, + ds_data_source_id=1 + ) + await ddc.adb_client.add(app_link) + + return url_id diff --git a/tests/automated/integration/api/url/by_id/delete/test_meta_url.py b/tests/automated/integration/api/url/by_id/delete/test_meta_url.py new file mode 100644 index 00000000..0fbee489 --- /dev/null +++ b/tests/automated/integration/api/url/by_id/delete/test_meta_url.py @@ -0,0 +1,77 @@ +import pytest + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.ds_delete.meta_url import FlagDSDeleteMetaURL +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_meta_url( + db_data_creator: DBDataCreator, + api_test_helper: APITestHelper, + test_agency_id: int +): + """ + Test that deletion works properly for a URL that is a validated meta url + and has all data source-only attributes. + """ + + url_id: int = await _setup( + ddc=db_data_creator, + agency_id=test_agency_id + ) + api_test_helper.request_validator.delete_v3( + f"url/{url_id}" + ) + await _check_results( + dbc=db_data_creator.adb_client + ) + + +async def _check_results( + dbc: AsyncDatabaseClient +) -> None: + pass + # CHECK + ## URL and all associated tables should be deleted + assert await dbc.has_no_rows(URL) + + ## DS App Link should not yet be deleted + app_link: DSAppLinkMetaURL = await dbc.one_or_none_model(DSAppLinkMetaURL) + assert app_link is not None + + ## DS App Meta URL Deletion Flag should be added + flag: FlagDSDeleteMetaURL = await dbc.one_or_none_model(FlagDSDeleteMetaURL) + assert flag is not None + assert flag.ds_meta_url_id == app_link.ds_meta_url_id + + +async def _setup( + ddc: DBDataCreator, + agency_id: int +) -> int: + pass + # SETUP + ## Validated Flag - Meta URL + url_id: int = (await ddc.create_validated_urls( + validation_type=URLType.META_URL, + count=1 + ))[0].url_id + + ## Link Agency + await ddc.create_url_agency_links( + url_ids=[url_id], + agency_ids=[agency_id] + ) + ## DS App Link + app_link = DSAppLinkMetaURL( + url_id=url_id, + ds_meta_url_id=1 + ) + await ddc.adb_client.add(app_link) + return url_id + diff --git a/tests/automated/integration/api/url/by_id/delete/test_validated_not_relevant.py b/tests/automated/integration/api/url/by_id/delete/test_validated_not_relevant.py new file mode 100644 index 00000000..6e6a738d --- /dev/null +++ b/tests/automated/integration/api/url/by_id/delete/test_validated_not_relevant.py @@ -0,0 +1,71 @@ +import pytest + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.impl.url.core.sqlalchemy import URL +from tests.helpers.api_test_helper import APITestHelper +from tests.helpers.data_creator.core import DBDataCreator + + +@pytest.mark.asyncio +async def test_validated_not_relevant( + db_data_creator: DBDataCreator, + api_test_helper: APITestHelper +): + """ + Test that deletion works properly for a URL that is a validated + as any of the non-relevant URL types + (not relevant, broken, individual record) + """ + + url_ids: list[int] = await _setup( + ddc=db_data_creator + ) + for url_id in url_ids: + api_test_helper.request_validator.delete_v3( + f"url/{url_id}" + ) + await _check_results( + url_ids, + dbc=db_data_creator.adb_client + ) + + + +async def _check_results( + url_ids: list[int], + dbc: AsyncDatabaseClient +) -> None: + pass + # CHECK + ## Each URLs Validation Flags should be deleted + url_validation_flags: list[FlagURLValidated] = await dbc.get_all(FlagURLValidated) + assert len(url_validation_flags) == 0 + + ## Each URL should be deleted + urls: list[URL] = await dbc.get_all(URL) + assert len(urls) == 0 + +async def _setup( + ddc: DBDataCreator +) -> list[int]: + url_ids: list[int] = [] + # SETUP (3 URLs) + for validated_type in [ + ## Validated Flag - Individual Record + URLType.INDIVIDUAL_RECORD, + ## Validated Flag - Broken + URLType.BROKEN_PAGE, + ## Validated Flag - Not Relevant + URLType.NOT_RELEVANT + ]: + url_id: int = (await ddc.create_validated_urls( + validation_type=validated_type, + count=1 + ))[0].url_id + url_ids.append(url_id) + return url_ids + + + diff --git a/tests/automated/integration/conftest.py b/tests/automated/integration/conftest.py index 42ab2214..6837bae0 100644 --- a/tests/automated/integration/conftest.py +++ b/tests/automated/integration/conftest.py @@ -151,7 +151,7 @@ async def api_test_helper( client: TestClient, db_client_test: DatabaseClient, adb_client_test: AsyncDatabaseClient -) -> AsyncGenerator[APITestHelper, Any]: + ) -> AsyncGenerator[APITestHelper, Any]: yield APITestHelper( request_validator=RequestValidator(client=client), async_core=client.app.state.async_core, @@ -170,25 +170,63 @@ def test_batch_id( @pytest_asyncio.fixture async def test_agency_id( - db_data_creator: DBDataCreator + db_data_creator: DBDataCreator, + pittsburgh_locality: LocalityCreationInfo, + pennsylvania: USStateCreationInfo ) -> int: - return await db_data_creator.agency( + """Test agency linked to two locations: Pittsburgh and Pennsylvania""" + agency_id: int = await db_data_creator.agency( name="Test Agency" ) + await db_data_creator.link_agencies_to_location( + agency_ids=[agency_id], + location_id=pittsburgh_locality.location_id + ) + await db_data_creator.link_agencies_to_location( + agency_ids=[agency_id], + location_id=pennsylvania.location_id + ) + return agency_id + +@pytest_asyncio.fixture +async def test_agency_id_2( + db_data_creator: DBDataCreator, + pittsburgh_locality: LocalityCreationInfo +) -> int: + agency_id: int = await db_data_creator.agency( + name="Test Agency 2" + ) + await db_data_creator.link_agencies_to_location( + agency_ids=[agency_id], + location_id=pittsburgh_locality.location_id + ) + return agency_id @pytest_asyncio.fixture async def test_url_data_source_id( - db_data_creator: DBDataCreator + db_data_creator: DBDataCreator, + test_agency_id: int ) -> int: - return (await db_data_creator.create_validated_urls( + url_id: int = (await db_data_creator.create_validated_urls( record_type=RecordType.CRIME_STATISTICS, validation_type=URLType.DATA_SOURCE, ))[0].url_id + await db_data_creator.link_urls_to_agencies( + url_ids=[url_id], + agency_ids=[test_agency_id] + ) + return url_id @pytest_asyncio.fixture async def test_url_meta_url_id( - db_data_creator: DBDataCreator + db_data_creator: DBDataCreator, + test_agency_id: int ) -> int: - return (await db_data_creator.create_validated_urls( + url_id: int = (await db_data_creator.create_validated_urls( validation_type=URLType.META_URL, ))[0].url_id + await db_data_creator.link_urls_to_agencies( + url_ids=[url_id], + agency_ids=[test_agency_id] + ) + return url_id diff --git a/tests/automated/integration/db/structure/test_upsert_new_agencies.py b/tests/automated/integration/db/structure/test_upsert_new_agencies.py index 6b377974..6adb043b 100644 --- a/tests/automated/integration/db/structure/test_upsert_new_agencies.py +++ b/tests/automated/integration/db/structure/test_upsert_new_agencies.py @@ -46,13 +46,13 @@ async def test_upsert_new_agencies( await adb_client.upsert_new_agencies([update_suggestion]) - rows = await adb_client.get_all(Agency, order_by_attribute="agency_id") + rows: list[Agency] = await adb_client.get_all(Agency, order_by_attribute="id") assert len(rows) == 3 d = {} for row in rows: - d[row.agency_id] = row.name + d[row.id] = row.name assert d[0] == "Updated Test Agency" assert d[1] == "Test Agency 1" diff --git a/tests/automated/integration/readonly/conftest.py b/tests/automated/integration/readonly/conftest.py index a5bcd249..4589f5b5 100644 --- a/tests/automated/integration/readonly/conftest.py +++ b/tests/automated/integration/readonly/conftest.py @@ -3,9 +3,9 @@ import pytest import pytest_asyncio +from sqlalchemy import Engine from starlette.testclient import TestClient -from src.db.client.async_ import AsyncDatabaseClient from src.db.helpers.connect import get_postgres_connection_string from tests.automated.integration.api._helpers.RequestValidator import RequestValidator from tests.automated.integration.readonly.helper import ReadOnlyTestHelper @@ -34,8 +34,10 @@ async def california_readonly( async def readonly_helper( event_loop, client: TestClient, + engine: Engine + ) -> AsyncGenerator[ReadOnlyTestHelper, Any]: - wipe_database(get_postgres_connection_string()) + wipe_database(engine) db_data_creator = DBDataCreator() api_test_helper = APITestHelper( request_validator=RequestValidator(client=client), diff --git a/tests/automated/integration/readonly/setup.py b/tests/automated/integration/readonly/setup.py index 20c6d537..ec8c78b1 100644 --- a/tests/automated/integration/readonly/setup.py +++ b/tests/automated/integration/readonly/setup.py @@ -156,16 +156,15 @@ async def add_agency( pittsburgh: LocalityCreationInfo ) -> int: agency_1 = Agency( - agency_id=next_int(), name="Agency 1", agency_type=AgencyType.LAW_ENFORCEMENT, jurisdiction_type=JurisdictionType.STATE, ) - await adb_client.add(agency_1) + agency_id: int = await adb_client.add(agency_1, return_id=True) # Add Agency location agency_1_location = LinkAgencyLocation( - agency_id=agency_1.agency_id, + agency_id=agency_id, location_id=pittsburgh.location_id, ) await adb_client.add(agency_1_location) - return agency_1.agency_id \ No newline at end of file + return agency_id \ No newline at end of file diff --git a/tests/automated/integration/tasks/conftest.py b/tests/automated/integration/tasks/conftest.py index a06da58c..937b2d12 100644 --- a/tests/automated/integration/tasks/conftest.py +++ b/tests/automated/integration/tasks/conftest.py @@ -11,6 +11,7 @@ def mock_pdap_client() -> PDAPClient: mock_access_manager = MagicMock( spec=AccessManager ) + mock_access_manager.data_sources_url = "http://example.com" mock_access_manager.build_url = MagicMock( return_value="http://example.com" ) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/conftest.py new file mode 100644 index 00000000..4cb7a3f2 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/conftest.py @@ -0,0 +1,22 @@ +import pytest_asyncio + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel + + +@pytest_asyncio.fixture +async def ds_app_linked_agency( + test_agency_id: int, + adb_client_test: AsyncDatabaseClient +) -> DSAppLinkInfoModel: + # Add DS App Link + ds_app_link = DSAppLinkAgency( + agency_id=test_agency_id, + ds_agency_id=67 + ) + await adb_client_test.add(ds_app_link) + return DSAppLinkInfoModel( + ds_app_id=67, + db_id=test_agency_id + ) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/test_add.py new file mode 100644 index 00000000..f0997d65 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/test_add.py @@ -0,0 +1,81 @@ +from http import HTTPStatus +from unittest.mock import AsyncMock + +import pytest +from pdap_access_manager import ResponseInfo + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.add.core import DSAppSyncAgenciesAddTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel +from src.external.pdap.impl.sync.agencies.add.request import AddAgenciesOuterRequest, AddAgenciesInnerRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \ + DSAppSyncAddResponseInnerModel +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_add( + db_data_creator: DBDataCreator, + test_agency_id: int, + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient, + pittsburgh_locality: LocalityCreationInfo, + pennsylvania: USStateCreationInfo, +): + operator = DSAppSyncAgenciesAddTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock make_request to return a false DS App id + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=DSAppSyncAddResponseModel( + entities=[ + DSAppSyncAddResponseInnerModel( + app_id=67, + request_id=test_agency_id + ) + ] + ) + ) + + # Check meets prerequisite + assert await operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: AddAgenciesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="agencies/add", + expected_model=AddAgenciesOuterRequest + ) + assert len(request.agencies) == 1 + agency: AddAgenciesInnerRequest = request.agencies[0] + assert agency.request_id == test_agency_id + content: AgencySyncContentModel = agency.content + assert content.name == "Test Agency" + assert content.jurisdiction_type == JurisdictionType.LOCAL + assert content.agency_type == AgencyType.UNKNOWN + assert set(content.location_ids) == { + pittsburgh_locality.location_id, + pennsylvania.location_id + } + + # Check Presence of DS App Link + ds_app_link: DSAppLinkAgency = await adb_client_test.one_or_none_model(DSAppLinkAgency) + assert ds_app_link is not None + assert ds_app_link.ds_agency_id == 67 + assert ds_app_link.agency_id == test_agency_id + + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/test_delete.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/test_delete.py new file mode 100644 index 00000000..e311b886 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/test_delete.py @@ -0,0 +1,64 @@ +import pytest + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.delete.core import DSAppSyncAgenciesDeleteTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.flag.ds_delete.agency import FlagDSDeleteAgency +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.shared.models.delete.request import DSAppSyncDeleteRequestModel +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_delete( + db_data_creator: DBDataCreator, + ds_app_linked_agency: DSAppLinkInfoModel, + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient +): + ds_agency_id: int = 67 + operator = DSAppSyncAgenciesDeleteTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + + # Check does not currently meet prerequisite + assert not await operator.meets_task_prerequisites() + + # Add Task Deletion Flag for App Link + flag = FlagDSDeleteAgency( + ds_agency_id=ds_agency_id + ) + await adb_client_test.add(flag) + + # Check meets prerequisite + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was caused with expected parameters + request: DSAppSyncDeleteRequestModel = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="agencies/delete", + expected_model=DSAppSyncDeleteRequestModel + ) + assert request.ids == [ds_agency_id] + + # Check DS App Link Is Deleted + assert await adb_client_test.has_no_rows(DSAppLinkAgency) + + # Check DS App Agency Deletion Flag is deleted + assert await adb_client_test.has_no_rows(FlagDSDeleteAgency) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/conftest.py new file mode 100644 index 00000000..eafc4148 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/conftest.py @@ -0,0 +1,16 @@ +import pytest + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.external.pdap.client import PDAPClient + + +@pytest.fixture +def operator( + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient +) -> DSAppSyncAgenciesUpdateTaskOperator: + return DSAppSyncAgenciesUpdateTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/helpers.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/helpers.py new file mode 100644 index 00000000..7901bea5 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/helpers.py @@ -0,0 +1,7 @@ +from datetime import datetime + + +def check_ds_app_link_updated( + old_updated_at: datetime +) -> None: + raise NotImplementedError \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_add_location_link.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_add_location_link.py new file mode 100644 index 00000000..4dfbaba7 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_add_location_link.py @@ -0,0 +1,76 @@ +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel +from src.external.pdap.impl.sync.agencies.update.request import UpdateAgenciesOuterRequest, UpdateAgenciesInnerRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.agency.conftest import ds_app_linked_agency +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.data_creator.models.creation_info.county import CountyCreationInfo +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo +from tests.helpers.run import run_task_and_confirm_success + + +async def test_add_location_link( + ds_app_linked_agency: DSAppLinkInfoModel, + pittsburgh_locality: LocalityCreationInfo, + allegheny_county: CountyCreationInfo, + pennsylvania: USStateCreationInfo, + operator: DSAppSyncAgenciesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient, +): + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Add location link + link = LinkAgencyLocation( + agency_id=ds_app_linked_agency.db_id, + location_id=allegheny_county.location_id + ) + await adb_client_test.add(link) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateAgenciesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="agencies/update", + expected_model=UpdateAgenciesOuterRequest + ) + assert len(request.agencies) == 1 + agency: UpdateAgenciesInnerRequest = request.agencies[0] + assert agency.app_id == ds_app_linked_agency.ds_app_id + content: AgencySyncContentModel = agency.content + assert content.name == "Test Agency" + assert content.jurisdiction_type == JurisdictionType.LOCAL + assert content.agency_type == AgencyType.UNKNOWN + assert set(content.location_ids) == { + pittsburgh_locality.location_id, + pennsylvania.location_id, + allegheny_county.location_id + } + + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkAgency | None = await adb_client_test.one_or_none_model(model=DSAppLinkAgency) + assert ds_app_link is not None + assert ds_app_link.ds_agency_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_agency.updated_at diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_delete_location_link.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_delete_location_link.py new file mode 100644 index 00000000..7f0450fe --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_delete_location_link.py @@ -0,0 +1,76 @@ +from sqlalchemy import delete + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.enums import JurisdictionType, AgencyType +from src.db.models.impl.link.agency_location.sqlalchemy import LinkAgencyLocation +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.agencies._shared.models.content import AgencySyncContentModel +from src.external.pdap.impl.sync.agencies.update.request import UpdateAgenciesOuterRequest, UpdateAgenciesInnerRequest +from tests.automated.integration.conftest import pennsylvania +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.data_creator.models.creation_info.locality import LocalityCreationInfo +from tests.helpers.data_creator.models.creation_info.us_state import USStateCreationInfo +from tests.helpers.run import run_task_and_confirm_success + + +async def test_delete_location_link( + ds_app_linked_agency: DSAppLinkInfoModel, + pittsburgh_locality: LocalityCreationInfo, + operator: DSAppSyncAgenciesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + pennsylvania: USStateCreationInfo, + adb_client_test: AsyncDatabaseClient +): + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Delete location link (pittsburgh) + statement = ( + delete( + LinkAgencyLocation + ) + .where( + LinkAgencyLocation.agency_id == ds_app_linked_agency.db_id, + LinkAgencyLocation.location_id == pittsburgh_locality.location_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateAgenciesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="agencies/update", + expected_model=UpdateAgenciesOuterRequest + ) + assert len(request.agencies) == 1 + agency: UpdateAgenciesInnerRequest = request.agencies[0] + assert agency.app_id == ds_app_linked_agency.ds_app_id + content: AgencySyncContentModel = agency.content + assert content.name == "Test Agency" + assert content.jurisdiction_type == JurisdictionType.LOCAL + assert content.agency_type == AgencyType.UNKNOWN + assert content.location_ids == [pennsylvania.location_id] + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkAgency | None = await adb_client_test.one_or_none_model(model=DSAppLinkAgency) + assert ds_app_link is not None + assert ds_app_link.ds_agency_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_agency.updated_at \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_update_agency.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_update_agency.py new file mode 100644 index 00000000..4749b0b0 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/agency/update/test_update_agency.py @@ -0,0 +1,66 @@ +from sqlalchemy import update + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.agencies.update.core import DSAppSyncAgenciesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.agency.ds_link.sqlalchemy import DSAppLinkAgency +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType +from src.db.models.impl.agency.sqlalchemy import Agency +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.agencies.update.request import UpdateAgenciesOuterRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_update_agency( + ds_app_linked_agency: DSAppLinkInfoModel, + operator: DSAppSyncAgenciesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient +): + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Update agency table + statement = ( + update( + Agency + ) + .values( + name="Updated Agency Name", + agency_type=AgencyType.COURT, + jurisdiction_type=JurisdictionType.STATE + ) + .where( + Agency.id == ds_app_linked_agency.db_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + extract_and_validate_sync_request( + mock_pdap_client, + expected_path="agencies/update", + expected_model=UpdateAgenciesOuterRequest + ) + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkAgency | None = await adb_client_test.one_or_none_model(model=DSAppLinkAgency) + assert ds_app_link is not None + assert ds_app_link.ds_agency_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_agency.updated_at diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/conftest.py new file mode 100644 index 00000000..72b621b2 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/conftest.py @@ -0,0 +1,21 @@ +import pytest_asyncio + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel + + +@pytest_asyncio.fixture +async def ds_app_linked_data_source_url( + test_url_data_source_id: int, + adb_client_test: AsyncDatabaseClient +) -> DSAppLinkInfoModel: + link = DSAppLinkDataSource( + ds_data_source_id=67, + url_id=test_url_data_source_id, + ) + await adb_client_test.add(link) + return DSAppLinkInfoModel( + db_id=test_url_data_source_id, + ds_app_id=67, + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py new file mode 100644 index 00000000..060637db --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_add.py @@ -0,0 +1,89 @@ +import pytest + +from src.core.enums import RecordType +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.add.core import DSAppSyncDataSourcesAddTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.add.request import AddDataSourcesOuterRequest, AddDataSourcesInnerRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \ + DSAppSyncAddResponseInnerModel +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_add( + db_data_creator: DBDataCreator, + test_url_data_source_id: int, + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient, + test_agency_id: int +): + operator = DSAppSyncDataSourcesAddTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=DSAppSyncAddResponseModel( + entities=[ + DSAppSyncAddResponseInnerModel( + app_id=67, + request_id=test_url_data_source_id + ) + ] + ) + ) + + # Check meet task prerequisites + assert await operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: AddDataSourcesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/add", + expected_model=AddDataSourcesOuterRequest + ) + assert len(request.data_sources) == 1 + data_source: AddDataSourcesInnerRequest = request.data_sources[0] + assert data_source.request_id == test_url_data_source_id + content: DataSourceSyncContentModel = data_source.content + assert content.source_url.startswith("https://example.com/") + assert content.name.startswith("Example ") + assert content.record_type == RecordType.CRIME_STATISTICS + assert content.description is None + assert content.record_formats is None + assert content.data_portal_type is None + assert content.supplying_entity is None + assert content.coverage_start is None + assert content.coverage_end is None + assert content.detail_level is None + assert content.agency_supplied is None + assert content.agency_originated is None + assert content.agency_described_not_in_database is None + assert content.update_method is None + assert content.readme_url is None + assert content.originating_entity is None + assert content.retention_schedule is None + assert content.scraper_url is None + assert content.access_notes is None + assert content.access_types is None + assert content.data_portal_type_other is None + assert content.url_status is None + + assert content.agency_ids == [test_agency_id] + + # Check Presence of DS App Link + ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(DSAppLinkDataSource) + assert ds_app_link is not None + assert ds_app_link.ds_data_source_id == 67 + assert ds_app_link.url_id == test_url_data_source_id diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_delete.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_delete.py new file mode 100644 index 00000000..a67f5db3 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/test_delete.py @@ -0,0 +1,68 @@ +import pytest + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.delete.core import \ + DSAppSyncDataSourcesDeleteTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.ds_delete.data_source import FlagDSDeleteDataSource +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.shared.models.delete.request import DSAppSyncDeleteRequestModel +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_delete( + db_data_creator: DBDataCreator, + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient +): + ds_data_source_id: int = 67 + operator = DSAppSyncDataSourcesDeleteTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check does not currently meet prerequisite + assert not await operator.meets_task_prerequisites() + + # Add DS App Link + ds_app_link = DSAppLinkDataSource( + url_id=None, + ds_data_source_id=ds_data_source_id, + ) + await adb_client_test.add(ds_app_link) + + # Add Task Deletion Flag for App Link + flag = FlagDSDeleteDataSource( + ds_data_source_id=ds_data_source_id, + ) + await adb_client_test.add(flag) + + # Check meets prerequisite + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was caused with expected parameters + request: DSAppSyncDeleteRequestModel = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/delete", + expected_model=DSAppSyncDeleteRequestModel + ) + assert request.ids == [ds_data_source_id] + + # Check DS App Link Is Deleted + assert await adb_client_test.has_no_rows(DSAppLinkDataSource) + + # Check DS App Data Source Deletion Flag is deleted + assert await adb_client_test.has_no_rows(FlagDSDeleteDataSource) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/conftest.py new file mode 100644 index 00000000..8a6bbfc5 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/conftest.py @@ -0,0 +1,17 @@ +import pytest + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.external.pdap.client import PDAPClient + + +@pytest.fixture +def operator( + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient +) -> DSAppSyncDataSourcesUpdateTaskOperator: + return DSAppSyncDataSourcesUpdateTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/helpers.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/helpers.py new file mode 100644 index 00000000..7901bea5 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/helpers.py @@ -0,0 +1,7 @@ +from datetime import datetime + + +def check_ds_app_link_updated( + old_updated_at: datetime +) -> None: + raise NotImplementedError \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_add_agency_link.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_add_agency_link.py new file mode 100644 index 00000000..9852df7a --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_add_agency_link.py @@ -0,0 +1,68 @@ +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesInnerRequest, \ + UpdateDataSourcesOuterRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_add_agency_link( + ds_app_linked_data_source_url: DSAppLinkInfoModel, + test_agency_id: int, + test_agency_id_2: int, + operator: DSAppSyncDataSourcesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Add additional agency link + link = LinkURLAgency( + url_id=ds_app_linked_data_source_url.db_id, + agency_id=test_agency_id_2 + ) + await adb_client_test.add(link) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateDataSourcesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/update", + expected_model=UpdateDataSourcesOuterRequest + ) + assert len(request.data_sources) == 1 + data_source: UpdateDataSourcesInnerRequest = request.data_sources[0] + assert data_source.app_id == ds_app_linked_data_source_url.ds_app_id + content: DataSourceSyncContentModel = data_source.content + assert content.name.startswith("Example") + assert set(content.agency_ids) == { + test_agency_id, + test_agency_id_2 + } + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(model=DSAppLinkDataSource) + assert ds_app_link is not None + assert ds_app_link.ds_data_source_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_data_source_url.updated_at + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_delete_agency_link.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_delete_agency_link.py new file mode 100644 index 00000000..f0dbf204 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_delete_agency_link.py @@ -0,0 +1,72 @@ +from sqlalchemy import delete + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesInnerRequest, \ + UpdateDataSourcesOuterRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.conftest import adb_client_test +from tests.helpers.run import run_task_and_confirm_success + + +async def test_delete_agency_link( + ds_app_linked_data_source_url: DSAppLinkInfoModel, + test_agency_id: int, + operator: DSAppSyncDataSourcesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Delete agency ID link + statement = ( + delete( + LinkURLAgency + ) + .where( + LinkURLAgency.url_id == ds_app_linked_data_source_url.db_id, + LinkURLAgency.agency_id == test_agency_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateDataSourcesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/update", + expected_model=UpdateDataSourcesOuterRequest + ) + assert len(request.data_sources) == 1 + data_source: UpdateDataSourcesInnerRequest = request.data_sources[0] + assert data_source.app_id == ds_app_linked_data_source_url.ds_app_id + content: DataSourceSyncContentModel = data_source.content + assert content.name.startswith("Example") + assert content.agency_ids == [] + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(model=DSAppLinkDataSource) + assert ds_app_link is not None + assert ds_app_link.ds_data_source_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_data_source_url.updated_at + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py new file mode 100644 index 00000000..94273019 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_optional_ds_metadata.py @@ -0,0 +1,105 @@ +from datetime import date + +from sqlalchemy import update + +from src.api.shared.models.message_response import MessageResponse +from src.core.enums import RecordType +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.models.impl.url.optional_ds_metadata.enums import AgencyAggregationEnum, AccessTypeEnum, UpdateMethodEnum, \ + RetentionScheduleEnum +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesInnerRequest, \ + UpdateDataSourcesOuterRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_update_optional_ds_metadata( + ds_app_linked_data_source_url: DSAppLinkInfoModel, + operator: DSAppSyncDataSourcesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient, + test_agency_id: int +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Update url_optional_ds_metadata_table table + insert = URLOptionalDataSourceMetadata( + url_id=ds_app_linked_data_source_url.db_id, + record_formats=["Record Format 1", "Record Format 2"], + data_portal_type="Test Data Portal Type", + supplying_entity="Test Supplying Entity", + coverage_start=date(year=2025, month=5, day=1), + coverage_end=date(year=2025, month=5, day=31), + agency_supplied=True, + agency_originated=True, + agency_aggregation=AgencyAggregationEnum.FEDERAL, + update_method=UpdateMethodEnum.OVERWRITE, + readme_url="https://example.com/readme", + originating_entity="Test originating entity", + retention_schedule=RetentionScheduleEnum.FUTURE_ONLY, + scraper_url="https://example.com/scraper", + submission_notes="Test submission notes", + access_notes="Test Access notes", + access_types=[AccessTypeEnum.DOWNLOAD], + data_portal_type_other="Test data portal type other" + ) + await adb_client_test.add(insert) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateDataSourcesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/update", + expected_model=UpdateDataSourcesOuterRequest + ) + assert len(request.data_sources) == 1 + data_source: UpdateDataSourcesInnerRequest = request.data_sources[0] + assert data_source.app_id == ds_app_linked_data_source_url.ds_app_id + content: DataSourceSyncContentModel = data_source.content + assert content.source_url.startswith("https://example.com/") + assert content.name.startswith("Example ") + assert content.record_type == RecordType.CRIME_STATISTICS + assert content.description is None + assert content.record_formats == ["Record Format 1", "Record Format 2"] + assert content.data_portal_type == "Test Data Portal Type" + assert content.supplying_entity == "Test Supplying Entity" + assert content.coverage_start == date(year=2025, month=5, day=1) + assert content.coverage_end == date(year=2025, month=5, day=31) + assert content.detail_level is None + assert content.agency_supplied == True + assert content.agency_originated == True + assert content.update_method == UpdateMethodEnum.OVERWRITE + assert content.readme_url == "https://example.com/readme" + assert content.originating_entity == "Test originating entity" + assert content.retention_schedule == RetentionScheduleEnum.FUTURE_ONLY + assert content.scraper_url == "https://example.com/scraper" + assert content.access_notes == "Test Access notes" + assert content.access_types == [AccessTypeEnum.DOWNLOAD] + assert content.data_portal_type_other == "Test data portal type other" + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(model=DSAppLinkDataSource) + assert ds_app_link is not None + assert ds_app_link.ds_data_source_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_data_source_url.updated_at + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_record_type.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_record_type.py new file mode 100644 index 00000000..66fae2cb --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_record_type.py @@ -0,0 +1,78 @@ +from sqlalchemy import update + +from src.api.shared.models.message_response import MessageResponse +from src.core.enums import RecordType +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesInnerRequest, \ + UpdateDataSourcesOuterRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_update_url( + ds_app_linked_data_source_url: DSAppLinkInfoModel, + operator: DSAppSyncDataSourcesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient, + test_agency_id: int +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Update URL Record Type table + statement = ( + update( + URLRecordType + ) + .values( + record_type=RecordType.POLICIES_AND_CONTRACTS + ) + .where( + URLRecordType.url_id == ds_app_linked_data_source_url.db_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateDataSourcesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/update", + expected_model=UpdateDataSourcesOuterRequest + ) + assert len(request.data_sources) == 1 + data_source: UpdateDataSourcesInnerRequest = request.data_sources[0] + assert data_source.app_id == ds_app_linked_data_source_url.ds_app_id + content: DataSourceSyncContentModel = data_source.content + assert content.name.startswith("Example ") + assert content.record_type == RecordType.POLICIES_AND_CONTRACTS + assert content.agency_ids == [ + test_agency_id + ] + assert content.retention_schedule is None + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(model=DSAppLinkDataSource) + assert ds_app_link is not None + assert ds_app_link.ds_data_source_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_data_source_url.updated_at + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py new file mode 100644 index 00000000..78c095c0 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/data_source/update/test_update_url.py @@ -0,0 +1,81 @@ +from sqlalchemy import update + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.data_sources.update.core import \ + DSAppSyncDataSourcesUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.data_source.sqlalchemy import DSAppLinkDataSource +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.data_sources._shared.content import DataSourceSyncContentModel +from src.external.pdap.impl.sync.data_sources.update.request import UpdateDataSourcesInnerRequest, \ + UpdateDataSourcesOuterRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_update_url( + ds_app_linked_data_source_url: DSAppLinkInfoModel, + operator: DSAppSyncDataSourcesUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient, + test_agency_id: int +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Update URL table + statement = ( + update( + URL + ) + .values( + name="Updated URL Name", + scheme="http", + trailing_slash=True, + url="modified-example.com", + description="Updated URL Description", + ) + .where( + URL.id == ds_app_linked_data_source_url.db_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateDataSourcesOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="data-sources/update", + expected_model=UpdateDataSourcesOuterRequest + ) + assert len(request.data_sources) == 1 + data_source: UpdateDataSourcesInnerRequest = request.data_sources[0] + assert data_source.app_id == ds_app_linked_data_source_url.ds_app_id + content: DataSourceSyncContentModel = data_source.content + assert content.name == "Updated URL Name" + assert content.agency_ids == [ + test_agency_id + ] + assert content.source_url == "http://modified-example.com/" + assert content.description == "Updated URL Description" + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkDataSource | None = await adb_client_test.one_or_none_model(model=DSAppLinkDataSource) + assert ds_app_link is not None + assert ds_app_link.ds_data_source_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_data_source_url.updated_at + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/helpers.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/helpers.py new file mode 100644 index 00000000..fcc1a93c --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/helpers.py @@ -0,0 +1,38 @@ +from http import HTTPStatus +from typing import Any +from unittest.mock import AsyncMock + +from pdap_access_manager import RequestInfo, RequestType, ResponseInfo +from pydantic import BaseModel + +from src.external.pdap.client import PDAPClient +from tests.helpers.mock import get_last_call_arguments + + +def get_last_request( + mock_pdap_client: PDAPClient +) -> RequestInfo: + return get_last_call_arguments(mock_pdap_client.access_manager.make_request)[0] + +def extract_and_validate_sync_request( + mock_pdap_client: PDAPClient, + expected_path: str, + expected_model: type[BaseModel] +) -> Any: + assert mock_pdap_client.access_manager.make_request.call_count == 1 + request_info: RequestInfo = get_last_request(mock_pdap_client) + assert request_info.type_ == RequestType.POST + full_expected_url: str = f"http://example.com/v3/source-manager/{expected_path}" + assert request_info.url == full_expected_url, f"Expected URL: {full_expected_url}, Actual URL: {request_info.url}" + return expected_model(**request_info.json_) + +def mock_make_request( + mock_pdap_client: PDAPClient, + data: BaseModel +) -> None: + mock_pdap_client.access_manager.make_request = AsyncMock( + return_value=ResponseInfo( + status_code=HTTPStatus.OK, + data=data.model_dump(mode='json') + ) + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/conftest.py new file mode 100644 index 00000000..69bf1287 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/conftest.py @@ -0,0 +1,21 @@ +import pytest_asyncio + +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel + + +@pytest_asyncio.fixture +async def ds_app_linked_meta_url( + test_url_meta_url_id: int, + adb_client_test: AsyncDatabaseClient +) -> DSAppLinkInfoModel: + ds_app_link = DSAppLinkMetaURL( + url_id=test_url_meta_url_id, + ds_meta_url_id=67 + ) + await adb_client_test.add(ds_app_link) + return DSAppLinkInfoModel( + ds_app_id=67, + db_id=test_url_meta_url_id + ) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py new file mode 100644 index 00000000..e63e1496 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_add.py @@ -0,0 +1,67 @@ +import pytest + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.add.core import DSAppSyncMetaURLsAddTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel +from src.external.pdap.impl.sync.meta_urls.add.request import AddMetaURLsOuterRequest, AddMetaURLsInnerRequest +from src.external.pdap.impl.sync.shared.models.add.response import DSAppSyncAddResponseModel, \ + DSAppSyncAddResponseInnerModel +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_add( + db_data_creator: DBDataCreator, + test_url_meta_url_id: int, + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient, + test_agency_id: int +): + operator = DSAppSyncMetaURLsAddTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=DSAppSyncAddResponseModel( + entities=[ + DSAppSyncAddResponseInnerModel( + app_id=67, + request_id=test_url_meta_url_id + ) + ] + ) + ) + + + # Check meets prerequisites + assert await operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: AddMetaURLsOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="meta-urls/add", + expected_model=AddMetaURLsOuterRequest + ) + assert len(request.meta_urls) == 1 + meta_url: AddMetaURLsInnerRequest = request.meta_urls[0] + assert meta_url.request_id == test_url_meta_url_id + content: MetaURLSyncContentModel = meta_url.content + assert content.url.startswith("https://example.com/") + assert content.agency_ids == [test_agency_id] + + # Check Presence of DS Meta URL App Link + ds_app_link: DSAppLinkMetaURL | None = await adb_client_test.one_or_none_model(model=DSAppLinkMetaURL) + assert ds_app_link is not None + assert ds_app_link.ds_meta_url_id == 67 + assert ds_app_link.url_id == test_url_meta_url_id diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_delete.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_delete.py new file mode 100644 index 00000000..8218759f --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/test_delete.py @@ -0,0 +1,68 @@ +import pytest + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.delete.core import DSAppSyncMetaURLsDeleteTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.flag.ds_delete.meta_url import FlagDSDeleteMetaURL +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.shared.models.delete.request import DSAppSyncDeleteRequestModel +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.helpers.data_creator.core import DBDataCreator +from tests.helpers.run import run_task_and_confirm_success + + +@pytest.mark.asyncio +async def test_delete( + db_data_creator: DBDataCreator, + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient +): + ds_meta_url_id: int = 67 + operator = DSAppSyncMetaURLsDeleteTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) + + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check does not currently meet prerequisite + assert not await operator.meets_task_prerequisites() + + # Add DS App Link + ds_app_link = DSAppLinkMetaURL( + ds_meta_url_id=ds_meta_url_id, + url_id=None, + ) + await adb_client_test.add(ds_app_link) + + # Add Task Deletion Flag for App Link + flag = FlagDSDeleteMetaURL( + ds_meta_url_id=ds_meta_url_id + ) + await adb_client_test.add(flag) + + # Check meets prerequisite + assert await operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: DSAppSyncDeleteRequestModel = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="meta-urls/delete", + expected_model=DSAppSyncDeleteRequestModel + ) + assert request.ids == [ds_meta_url_id] + + # Check DS App Link Is Deleted + assert await adb_client_test.has_no_rows(DSAppLinkMetaURL) + + # Check DS App Meta URL Deletion Flag is deleted + assert await adb_client_test.has_no_rows(FlagDSDeleteMetaURL) diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/conftest.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/conftest.py new file mode 100644 index 00000000..3b2e8e7b --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/conftest.py @@ -0,0 +1,16 @@ +import pytest + +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.external.pdap.client import PDAPClient + + +@pytest.fixture +def operator( + adb_client_test: AsyncDatabaseClient, + mock_pdap_client: PDAPClient +) -> DSAppSyncMetaURLsUpdateTaskOperator: + return DSAppSyncMetaURLsUpdateTaskOperator( + adb_client=adb_client_test, + pdap_client=mock_pdap_client + ) \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/helpers.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/helpers.py new file mode 100644 index 00000000..7901bea5 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/helpers.py @@ -0,0 +1,7 @@ +from datetime import datetime + + +def check_ds_app_link_updated( + old_updated_at: datetime +) -> None: + raise NotImplementedError \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_add_agency_link.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_add_agency_link.py new file mode 100644 index 00000000..1caa1eab --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_add_agency_link.py @@ -0,0 +1,63 @@ +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel +from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.conftest import adb_client_test +from tests.helpers.run import run_task_and_confirm_success + + +async def test_add_agency_link( + ds_app_linked_meta_url: DSAppLinkInfoModel, + test_agency_id: int, + test_agency_id_2: int, + operator: DSAppSyncMetaURLsUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Add agency link + link = LinkURLAgency( + url_id=ds_app_linked_meta_url.db_id, + agency_id=test_agency_id_2 + ) + await adb_client_test.add(link) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateMetaURLsOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="meta-urls/update", + expected_model=UpdateMetaURLsOuterRequest + ) + assert len(request.meta_urls) == 1 + meta_url: UpdateMetaURLsInnerRequest = request.meta_urls[0] + assert meta_url.app_id == ds_app_linked_meta_url.ds_app_id + content: MetaURLSyncContentModel = meta_url.content + assert content.url.startswith("https://example.com/") + assert set(content.agency_ids) == {test_agency_id, test_agency_id_2} + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkMetaURL | None = await adb_client_test.one_or_none_model(model=DSAppLinkMetaURL) + assert ds_app_link is not None + assert ds_app_link.ds_meta_url_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_meta_url.updated_at diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_delete_agency_link.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_delete_agency_link.py new file mode 100644 index 00000000..11ef284d --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_delete_agency_link.py @@ -0,0 +1,66 @@ +from sqlalchemy import delete + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.link.url_agency.sqlalchemy import LinkURLAgency +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel +from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_delete_agency_link( + ds_app_linked_meta_url: DSAppLinkInfoModel, + test_agency_id: int, + operator: DSAppSyncMetaURLsUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + assert not await operator.meets_task_prerequisites() + + # Delete agency link + statement = ( + delete( + LinkURLAgency + ) + .where( + LinkURLAgency.url_id == ds_app_linked_meta_url.db_id, + LinkURLAgency.agency_id == test_agency_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert await operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateMetaURLsOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="meta-urls/update", + expected_model=UpdateMetaURLsOuterRequest + ) + assert len(request.meta_urls) == 1 + meta_url: UpdateMetaURLsInnerRequest = request.meta_urls[0] + assert meta_url.app_id == ds_app_linked_meta_url.ds_app_id + content: MetaURLSyncContentModel = meta_url.content + assert content.agency_ids == [] + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkMetaURL | None = await adb_client_test.one_or_none_model(model=DSAppLinkMetaURL) + assert ds_app_link is not None + assert ds_app_link.ds_meta_url_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_meta_url.updated_at diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_update_url.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_update_url.py new file mode 100644 index 00000000..0342c388 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/meta_url/update/test_update_url.py @@ -0,0 +1,75 @@ +from sqlalchemy import update + +from src.api.shared.models.message_response import MessageResponse +from src.core.tasks.scheduled.impl.sync_to_ds.impl.meta_urls.update.core import DSAppSyncMetaURLsUpdateTaskOperator +from src.db.client.async_ import AsyncDatabaseClient +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.ds_meta_url.sqlalchemy import DSAppLinkMetaURL +from src.external.pdap.client import PDAPClient +from src.external.pdap.impl.sync.meta_urls._shared.content import MetaURLSyncContentModel +from src.external.pdap.impl.sync.meta_urls.update.request import UpdateMetaURLsOuterRequest, UpdateMetaURLsInnerRequest +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.helpers import extract_and_validate_sync_request, \ + mock_make_request +from tests.automated.integration.tasks.scheduled.impl.sync_to_ds.models.ds_app_link_info import DSAppLinkInfoModel +from tests.helpers.run import run_task_and_confirm_success + + +async def test_update_url( + ds_app_linked_meta_url: DSAppLinkInfoModel, + operator: DSAppSyncMetaURLsUpdateTaskOperator, + mock_pdap_client: PDAPClient, + adb_client_test: AsyncDatabaseClient, + test_agency_id: int +): + # Mock make_request + mock_make_request( + mock_pdap_client=mock_pdap_client, + data=MessageResponse(message="Success") + ) + + # Check prerequisites not met + assert not await operator.meets_task_prerequisites() + + # Update URL table + statement = ( + update( + URL + ) + .values( + name="Updated URL Name", + scheme="http", + trailing_slash=True, + url="modified-example.com", + description="Updated URL Description", + ) + .where( + URL.id == ds_app_linked_meta_url.db_id + ) + ) + await adb_client_test.execute(statement) + + # Check prerequisites are met + assert operator.meets_task_prerequisites() + + # Run task and confirm runs without error + await run_task_and_confirm_success(operator) + + # Confirm expected method was called with expected parameters + request: UpdateMetaURLsOuterRequest = extract_and_validate_sync_request( + mock_pdap_client, + expected_path="meta-urls/update", + expected_model=UpdateMetaURLsOuterRequest + ) + assert len(request.meta_urls) == 1 + meta_url: UpdateMetaURLsInnerRequest = request.meta_urls[0] + assert meta_url.app_id == ds_app_linked_meta_url.ds_app_id + content: MetaURLSyncContentModel = meta_url.content + assert content.url == "http://modified-example.com/" + assert set(content.agency_ids) == {test_agency_id} + + # Check DS App Link Is Updated + ds_app_link: DSAppLinkMetaURL | None = await adb_client_test.one_or_none_model(model=DSAppLinkMetaURL) + assert ds_app_link is not None + assert ds_app_link.ds_meta_url_id == 67 + assert ds_app_link.last_synced_at > ds_app_linked_meta_url.updated_at + diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/models/__init__.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/models/ds_app_link_info.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/models/ds_app_link_info.py new file mode 100644 index 00000000..36e86874 --- /dev/null +++ b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/models/ds_app_link_info.py @@ -0,0 +1,9 @@ +from datetime import datetime + +from pydantic import BaseModel + + +class DSAppLinkInfoModel(BaseModel): + ds_app_id: int + db_id: int + updated_at: datetime = datetime.now() \ No newline at end of file diff --git a/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/test_.py b/tests/automated/integration/tasks/scheduled/impl/sync_to_ds/test_.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/mock.py b/tests/automated/integration/tasks/url/impl/submit_approved/mock.py deleted file mode 100644 index 0e631d5b..00000000 --- a/tests/automated/integration/tasks/url/impl/submit_approved/mock.py +++ /dev/null @@ -1,38 +0,0 @@ -from http import HTTPStatus -from unittest.mock import AsyncMock - -from pdap_access_manager import ResponseInfo - -from src.core.enums import SubmitResponseStatus -from src.external.pdap.client import PDAPClient - - -def mock_make_request(pdap_client: PDAPClient, urls: list[str]): - assert len(urls) == 3, "Expected 3 urls" - pdap_client.access_manager.make_request = AsyncMock( - return_value=ResponseInfo( - status_code=HTTPStatus.OK, - data={ - "data_sources": [ - { - "url": urls[0], - "status": SubmitResponseStatus.SUCCESS, - "error": None, - "data_source_id": 21, - }, - { - "url": urls[1], - "status": SubmitResponseStatus.SUCCESS, - "error": None, - "data_source_id": 34, - }, - { - "url": urls[2], - "status": SubmitResponseStatus.FAILURE, - "error": "Test Error", - "data_source_id": None - } - ] - } - ) - ) diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/setup.py b/tests/automated/integration/tasks/url/impl/submit_approved/setup.py deleted file mode 100644 index 1f9d8915..00000000 --- a/tests/automated/integration/tasks/url/impl/submit_approved/setup.py +++ /dev/null @@ -1,49 +0,0 @@ -from src.api.endpoints.review.approve.dto import FinalReviewApprovalInfo -from src.core.enums import RecordType -from tests.helpers.data_creator.core import DBDataCreator -from tests.helpers.data_creator.models.creation_info.batch.v1 import BatchURLCreationInfo - - -async def setup_validated_urls(db_data_creator: DBDataCreator, agency_id: int) -> list[str]: - creation_info: BatchURLCreationInfo = await db_data_creator.batch_and_urls( - url_count=3, - with_html_content=True - ) - - url_1 = creation_info.url_ids[0] - url_2 = creation_info.url_ids[1] - url_3 = creation_info.url_ids[2] - await db_data_creator.adb_client.approve_url( - approval_info=FinalReviewApprovalInfo( - url_id=url_1, - record_type=RecordType.ACCIDENT_REPORTS, - agency_ids=[agency_id], - name="URL 1 Name", - description=None, - record_formats=["Record Format 1", "Record Format 2"], - data_portal_type="Data Portal Type 1", - supplying_entity="Supplying Entity 1" - ), - user_id=1 - ) - await db_data_creator.adb_client.approve_url( - approval_info=FinalReviewApprovalInfo( - url_id=url_2, - record_type=RecordType.INCARCERATION_RECORDS, - agency_ids=[agency_id], - name="URL 2 Name", - description="URL 2 Description", - ), - user_id=2 - ) - await db_data_creator.adb_client.approve_url( - approval_info=FinalReviewApprovalInfo( - url_id=url_3, - record_type=RecordType.ACCIDENT_REPORTS, - agency_ids=[agency_id], - name="URL 3 Name", - description="URL 3 Description", - ), - user_id=3 - ) - return creation_info.urls diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py deleted file mode 100644 index 22ae8129..00000000 --- a/tests/automated/integration/tasks/url/impl/submit_approved/test_submit_approved_url_task.py +++ /dev/null @@ -1,134 +0,0 @@ -import pytest -from deepdiff import DeepDiff -from pdap_access_manager import RequestInfo, RequestType, DataSourcesNamespaces - -from src.core.tasks.url.enums import TaskOperatorOutcome -from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource -from src.db.models.impl.url.task_error.sqlalchemy import URLTaskError -from src.external.pdap.client import PDAPClient -from tests.automated.integration.tasks.url.impl.submit_approved.mock import mock_make_request -from tests.automated.integration.tasks.url.impl.submit_approved.setup import setup_validated_urls - - -@pytest.mark.asyncio -async def test_submit_approved_url_task( - db_data_creator, - mock_pdap_client: PDAPClient, - monkeypatch -): - """ - The submit_approved_url_task should submit - all validated URLs to the PDAP Data Sources App - """ - - - # Get Task Operator - operator = SubmitApprovedURLTaskOperator( - adb_client=db_data_creator.adb_client, - pdap_client=mock_pdap_client - ) - - # Check Task Operator does not yet meet pre-requisites - assert not await operator.meets_task_prerequisites() - - # Create URLs with status 'validated' in database and all requisite URL values - # Ensure they have optional metadata as well - agency_id = await db_data_creator.agency() - urls: list[str] = await setup_validated_urls(db_data_creator, agency_id=agency_id) - mock_make_request(mock_pdap_client, urls) - - # Check Task Operator does meet pre-requisites - assert await operator.meets_task_prerequisites() - - # Run Task - run_info = await operator.run_task() - - # Check Task has been marked as completed - assert run_info.outcome == TaskOperatorOutcome.SUCCESS, run_info.message - - # Check Task Operator no longer meets pre-requisites - assert not await operator.meets_task_prerequisites() - - # Get URLs - urls: list[URL] = await db_data_creator.adb_client.get_all(URL, order_by_attribute="id") - url_1: URL = urls[0] - url_2: URL = urls[1] - url_3: URL = urls[2] - - # Get URL Data Source Links - url_data_sources = await db_data_creator.adb_client.get_all(URLDataSource) - assert len(url_data_sources) == 2 - - url_data_source_1 = url_data_sources[0] - url_data_source_2 = url_data_sources[1] - - assert url_data_source_1.url_id == url_1.id - assert url_data_source_1.data_source_id == 21 - - assert url_data_source_2.url_id == url_2.id - assert url_data_source_2.data_source_id == 34 - - # Check that errored URL has entry in url_error_info - url_errors = await db_data_creator.adb_client.get_all(URLTaskError) - assert len(url_errors) == 1 - url_error = url_errors[0] - assert url_error.url_id == url_3.id - assert url_error.error == "Test Error" - - # Check mock method was called expected parameters - access_manager = mock_pdap_client.access_manager - access_manager.make_request.assert_called_once() - access_manager.build_url.assert_called_with( - namespace=DataSourcesNamespaces.SOURCE_COLLECTOR, - subdomains=['data-sources'] - ) - - call_1 = access_manager.make_request.call_args_list[0][0][0] - expected_call_1 = RequestInfo( - type_=RequestType.POST, - url="http://example.com", - headers=access_manager.jwt_header.return_value, - json_={ - "data_sources": [ - { - "name": "URL 1 Name", - "source_url": url_1.url, - "record_type": "Accident Reports", - "description": None, - "record_formats": ["Record Format 1", "Record Format 2"], - "data_portal_type": "Data Portal Type 1", - "last_approval_editor": 1, - "supplying_entity": "Supplying Entity 1", - "agency_ids": [agency_id] - }, - { - "name": "URL 2 Name", - "source_url": url_2.url, - "record_type": "Incarceration Records", - "description": "URL 2 Description", - "last_approval_editor": 2, - "supplying_entity": None, - "record_formats": None, - "data_portal_type": None, - "agency_ids": [agency_id] - }, - { - "name": "URL 3 Name", - "source_url": url_3.url, - "record_type": "Accident Reports", - "description": "URL 3 Description", - "last_approval_editor": 3, - "supplying_entity": None, - "record_formats": None, - "data_portal_type": None, - "agency_ids": [agency_id] - } - ] - } - ) - assert call_1.type_ == expected_call_1.type_ - assert call_1.headers == expected_call_1.headers - diff = DeepDiff(call_1.json_, expected_call_1.json_, ignore_order=True) - assert diff == {}, f"Differences found: {diff}" diff --git a/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py b/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py deleted file mode 100644 index 76754b29..00000000 --- a/tests/automated/integration/tasks/url/impl/submit_approved/test_validated_meta_url.py +++ /dev/null @@ -1,41 +0,0 @@ -import pytest - -from src.core.tasks.base.run_info import TaskOperatorRunInfo -from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.data_source.sqlalchemy import URLDataSource -from src.external.pdap.client import PDAPClient -from tests.helpers.asserts import assert_task_run_success - - -@pytest.mark.asyncio -async def test_validated_meta_url_not_included( - db_data_creator, - mock_pdap_client: PDAPClient, -): - """ - If a validated Meta URL is included in the database - This should not be included in the submit approved task - """ - - # Get Task Operator - operator = SubmitApprovedURLTaskOperator( - adb_client=db_data_creator.adb_client, - pdap_client=mock_pdap_client - ) - - dbdc = db_data_creator - url_1: int = (await dbdc.create_validated_urls( - validation_type=URLType.META_URL - ))[0].url_id - - # Test task operator does not meet prerequisites - assert not await operator.meets_task_prerequisites() - - # Run task and confirm runs without error - run_info: TaskOperatorRunInfo = await operator.run_task() - assert_task_run_success(run_info) - - # Confirm entry not included in database - ds_urls: list[URLDataSource] = await dbdc.adb_client.get_all(URLDataSource) - assert len(ds_urls) == 0 diff --git a/tests/automated/integration/tasks/url/impl/submit_meta_urls/test_core.py b/tests/automated/integration/tasks/url/impl/submit_meta_urls/test_core.py deleted file mode 100644 index 08914bed..00000000 --- a/tests/automated/integration/tasks/url/impl/submit_meta_urls/test_core.py +++ /dev/null @@ -1,79 +0,0 @@ -from http import HTTPStatus -from unittest.mock import AsyncMock - -import pytest -from pdap_access_manager import ResponseInfo - -from src.collectors.enums import URLStatus -from src.core.tasks.url.operators.submit_meta_urls.core import SubmitMetaURLsTaskOperator -from src.db.dtos.url.mapping_.simple import SimpleURLMapping -from src.db.models.impl.flag.url_validated.enums import URLType -from src.db.models.impl.url.core.sqlalchemy import URL -from src.db.models.impl.url.ds_meta_url.sqlalchemy import URLDSMetaURL -from src.external.pdap.client import PDAPClient -from src.external.pdap.impl.meta_urls.enums import SubmitMetaURLsStatus -from tests.helpers.data_creator.core import DBDataCreator -from tests.helpers.run import run_task_and_confirm_success - - -@pytest.mark.asyncio -async def test_submit_meta_urls( - db_data_creator: DBDataCreator, - mock_pdap_client: PDAPClient, -): - """ - Test Submit Meta URLs Task Operator - """ - - - operator = SubmitMetaURLsTaskOperator( - adb_client=db_data_creator.adb_client, - pdap_client=mock_pdap_client - ) - - assert not await operator.meets_task_prerequisites() - - # Create validated meta url - agency_id: int = (await db_data_creator.create_agencies(count=1))[0] - - mapping: SimpleURLMapping = (await db_data_creator.create_validated_urls( - validation_type=URLType.META_URL - ))[0] - await db_data_creator.link_urls_to_agencies( - url_ids=[mapping.url_id], - agency_ids=[agency_id] - ) - - mock_pdap_client.access_manager.make_request = AsyncMock( - return_value=ResponseInfo( - status_code=HTTPStatus.OK, - data={ - "meta_urls": [ - { - "url": f"https://{mapping.url}", - "agency_id": agency_id, - "status": SubmitMetaURLsStatus.SUCCESS.value, - "meta_url_id": 2, - "error": None, - }, - ] - } - ) - ) - - - assert await operator.meets_task_prerequisites() - - await run_task_and_confirm_success(operator) - - urls: list[URL] = await db_data_creator.adb_client.get_all(URL) - assert len(urls) == 1 - url: URL = urls[0] - assert url.status == URLStatus.OK - - url_ds_meta_urls: list[URLDSMetaURL] = await db_data_creator.adb_client.get_all(URLDSMetaURL) - assert len(url_ds_meta_urls) == 1 - url_ds_meta_url: URLDSMetaURL = url_ds_meta_urls[0] - assert url_ds_meta_url.url_id == url.id - assert url_ds_meta_url.ds_meta_url_id == 2 - assert url_ds_meta_url.agency_id == agency_id \ No newline at end of file diff --git a/tests/automated/integration/tasks/url/loader/test_flags.py b/tests/automated/integration/tasks/url/loader/test_flags.py index f812c947..33014f5f 100644 --- a/tests/automated/integration/tasks/url/loader/test_flags.py +++ b/tests/automated/integration/tasks/url/loader/test_flags.py @@ -12,7 +12,6 @@ from src.core.tasks.url.operators.probe.core import URLProbeTaskOperator from src.core.tasks.url.operators.record_type.core import URLRecordTypeTaskOperator from src.core.tasks.url.operators.root_url.core import URLRootURLTaskOperator -from src.core.tasks.url.operators.submit_approved.core import SubmitApprovedURLTaskOperator class FlagTestParams(BaseModel): @@ -36,10 +35,6 @@ class Config: env_var="URL_AGENCY_IDENTIFICATION_TASK_FLAG", operator=AgencyIdentificationTaskOperator ), - FlagTestParams( - env_var="URL_SUBMIT_APPROVED_TASK_FLAG", - operator=SubmitApprovedURLTaskOperator - ), FlagTestParams( env_var="URL_MISC_METADATA_TASK_FLAG", operator=URLMiscellaneousMetadataTaskOperator diff --git a/tests/automated/integration/tasks/url/loader/test_happy_path.py b/tests/automated/integration/tasks/url/loader/test_happy_path.py index a7b02e89..0786cb24 100644 --- a/tests/automated/integration/tasks/url/loader/test_happy_path.py +++ b/tests/automated/integration/tasks/url/loader/test_happy_path.py @@ -2,7 +2,7 @@ from src.core.tasks.url.loader import URLTaskOperatorLoader -NUMBER_OF_TASK_OPERATORS: int = 14 +NUMBER_OF_TASK_OPERATORS: int = 21 @pytest.mark.asyncio async def test_happy_path( diff --git a/tests/conftest.py b/tests/conftest.py index 8ba93200..eddb7f2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,18 +7,19 @@ import pytest_asyncio from aiohttp import ClientSession from alembic.config import Config -from sqlalchemy import create_engine, inspect, MetaData +from sqlalchemy import create_engine, inspect, MetaData, Engine +from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine from sqlalchemy.orm import scoped_session, sessionmaker from src.core.env_var_manager import EnvVarManager +from src.db.client.async_ import AsyncDatabaseClient +from src.db.client.sync import DatabaseClient +from src.db.helpers.connect import get_postgres_connection_string +from src.db.models.impl.log.sqlalchemy import Log # noqa: F401 # Below are to prevent import errors from src.db.models.impl.missing import Missing # noqa: F401 -from src.db.models.impl.log.sqlalchemy import Log # noqa: F401 from src.db.models.impl.task.error import TaskError # noqa: F401 from src.db.models.impl.url.checked_for_duplicate import URLCheckedForDuplicate # noqa: F401 -from src.db.client.async_ import AsyncDatabaseClient -from src.db.client.sync import DatabaseClient -from src.db.helpers.connect import get_postgres_connection_string from src.util.helper_functions import load_from_environment from tests.helpers.alembic_runner import AlembicRunner from tests.helpers.data_creator.core import DBDataCreator @@ -99,33 +100,55 @@ def setup_and_teardown(): live_connection.close() engine.dispose() +@pytest.fixture(scope="session") +def engine(): + conn = get_postgres_connection_string() + engine = create_engine(conn) + yield engine + engine.dispose() + +@pytest.fixture(scope="session") +def async_engine(): + conn = get_postgres_connection_string(is_async=True) + engine = create_async_engine(conn) + yield engine + engine.dispose() + @pytest.fixture -def wiped_database(): +def wiped_database( + engine: Engine +): """Wipe all data from database.""" - wipe_database(get_postgres_connection_string()) + wipe_database(engine) @pytest.fixture -def db_client_test(wiped_database) -> Generator[DatabaseClient, Any, None]: +def db_client_test( + wiped_database, + engine +) -> Generator[DatabaseClient, Any, None]: # Drop pre-existing table - conn = get_postgres_connection_string() - db_client = DatabaseClient(db_url=conn) + db_client = DatabaseClient(engine) yield db_client db_client.engine.dispose() @pytest_asyncio.fixture -async def populated_database(wiped_database) -> None: - conn = get_postgres_connection_string(is_async=True) - adb_client = AsyncDatabaseClient(db_url=conn) +async def populated_database( + wiped_database, + async_engine: AsyncEngine +) -> None: + adb_client = AsyncDatabaseClient(async_engine) await populate_database(adb_client) @pytest_asyncio.fixture -async def adb_client_test(wiped_database) -> AsyncGenerator[AsyncDatabaseClient, Any]: - conn = get_postgres_connection_string(is_async=True) - adb_client = AsyncDatabaseClient(db_url=conn) +async def adb_client_test( + wiped_database, + async_engine: AsyncEngine +) -> AsyncGenerator[AsyncDatabaseClient, Any]: + adb_client = AsyncDatabaseClient(async_engine) yield adb_client - adb_client.engine.dispose() + await adb_client.engine.dispose() @pytest.fixture def db_data_creator( diff --git a/tests/helpers/data_creator/commands/impl/urls_/query.py b/tests/helpers/data_creator/commands/impl/urls_/query.py index 7587abfb..beff749f 100644 --- a/tests/helpers/data_creator/commands/impl/urls_/query.py +++ b/tests/helpers/data_creator/commands/impl/urls_/query.py @@ -1,6 +1,6 @@ from datetime import datetime -from src.core.tasks.url.operators.submit_approved.tdo import SubmittedURLInfo +from tests.helpers.data_creator.commands.impl.urls_.tdo import SubmittedURLInfo from src.db.dtos.url.insert import InsertURLsInfo from src.db.models.impl.url.core.enums import URLSource from src.db.models.impl.url.core.pydantic.info import URLInfo diff --git a/tests/helpers/data_creator/commands/impl/urls_/tdo.py b/tests/helpers/data_creator/commands/impl/urls_/tdo.py new file mode 100644 index 00000000..a8991dcd --- /dev/null +++ b/tests/helpers/data_creator/commands/impl/urls_/tdo.py @@ -0,0 +1,12 @@ +from datetime import datetime + +from pydantic import BaseModel + +from src.core.enums import RecordType + + +class SubmittedURLInfo(BaseModel): + url_id: int + data_source_id: int | None + request_error: str | None + submitted_at: datetime | None = None \ No newline at end of file diff --git a/tests/helpers/data_creator/core.py b/tests/helpers/data_creator/core.py index 575c594f..dd08a178 100644 --- a/tests/helpers/data_creator/core.py +++ b/tests/helpers/data_creator/core.py @@ -12,7 +12,7 @@ from src.db.dtos.url.insert import InsertURLsInfo from src.db.dtos.url.mapping_.simple import SimpleURLMapping from src.db.enums import TaskType -from src.db.models.impl.agency.enums import AgencyType +from src.db.models.impl.agency.enums import AgencyType, JurisdictionType from src.db.models.impl.agency.sqlalchemy import Agency from src.db.models.impl.duplicate.pydantic.insert import DuplicateInsertInfo from src.db.models.impl.flag.root_url.sqlalchemy import FlagRootURL @@ -522,9 +522,10 @@ async def create_url_agency_links( async def create_agency(self, agency_id: int = 1) -> None: agency = Agency( - agency_id=agency_id, + id=agency_id, name=generate_test_name(agency_id), - agency_type=AgencyType.UNKNOWN + agency_type=AgencyType.UNKNOWN, + jurisdiction_type=JurisdictionType.LOCAL ) await self.adb_client.add_all([agency]) @@ -534,9 +535,10 @@ async def create_agencies(self, count: int = 3) -> list[int]: for _ in range(count): agency_id = next_int() agency = Agency( - agency_id=agency_id, + id=agency_id, name=generate_test_name(agency_id), - agency_type=AgencyType.UNKNOWN + agency_type=AgencyType.UNKNOWN, + jurisdiction_type=JurisdictionType.LOCAL ) agencies.append(agency) agency_ids.append(agency_id) diff --git a/tests/helpers/data_creator/generate.py b/tests/helpers/data_creator/generate.py index f1eefce2..b447888d 100644 --- a/tests/helpers/data_creator/generate.py +++ b/tests/helpers/data_creator/generate.py @@ -76,7 +76,7 @@ def generate_url_data_sources( return [ URLDataSourcePydantic( url_id=url_id, - data_source_id=url_id, + ds_data_source_id=url_id, ) for url_id in url_ids ] \ No newline at end of file diff --git a/tests/helpers/mock.py b/tests/helpers/mock.py new file mode 100644 index 00000000..b761887b --- /dev/null +++ b/tests/helpers/mock.py @@ -0,0 +1,5 @@ +from unittest.mock import MagicMock, AsyncMock + + +def get_last_call_arguments(mock: MagicMock | AsyncMock) -> tuple: + return mock.call_args_list[-1].args \ No newline at end of file diff --git a/tests/helpers/setup/wipe.py b/tests/helpers/setup/wipe.py index e81c266d..f6cd3582 100644 --- a/tests/helpers/setup/wipe.py +++ b/tests/helpers/setup/wipe.py @@ -1,11 +1,10 @@ -from sqlalchemy import create_engine +from sqlalchemy import create_engine, Engine from src.db.models.templates_.base import Base -def wipe_database(connection_string: str) -> None: +def wipe_database(engine: Engine) -> None: """Wipe all data from database.""" - engine = create_engine(connection_string) with engine.connect() as connection: for table in reversed(Base.metadata.sorted_tables): if table.info == "view":