diff --git a/tests/automated/integration/readonly/api/data_sources/agencies/__init__.py b/src/api/endpoints/data_source/_shared/__init__.py similarity index 100% rename from tests/automated/integration/readonly/api/data_sources/agencies/__init__.py rename to src/api/endpoints/data_source/_shared/__init__.py diff --git a/src/api/endpoints/data_source/_shared/build.py b/src/api/endpoints/data_source/_shared/build.py new file mode 100644 index 00000000..35b65343 --- /dev/null +++ b/src/api/endpoints/data_source/_shared/build.py @@ -0,0 +1,66 @@ +from sqlalchemy import Select, select, and_ +from sqlalchemy.orm import selectinload + +from src.db.models.impl.flag.url_validated.enums import URLType +from src.db.models.impl.flag.url_validated.sqlalchemy import FlagURLValidated +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType + + +def build_data_source_get_query() -> Select: + return ( + select( + URL, + URL.id, + URL.url, + + # Required Attributes + URL.name, + URLRecordType.record_type, + + # Optional Attributes + URL.description, + LinkBatchURL.batch_id, + URLOptionalDataSourceMetadata.record_formats, + URLOptionalDataSourceMetadata.data_portal_type, + URLOptionalDataSourceMetadata.supplying_entity, + URLOptionalDataSourceMetadata.coverage_start, + URLOptionalDataSourceMetadata.coverage_end, + URLOptionalDataSourceMetadata.agency_supplied, + URLOptionalDataSourceMetadata.agency_aggregation, + URLOptionalDataSourceMetadata.agency_described_not_in_database, + URLOptionalDataSourceMetadata.agency_originated, + URLOptionalDataSourceMetadata.update_method, + URLOptionalDataSourceMetadata.readme_url, + URLOptionalDataSourceMetadata.originating_entity, + URLOptionalDataSourceMetadata.retention_schedule, + URLOptionalDataSourceMetadata.scraper_url, + URLOptionalDataSourceMetadata.submission_notes, + URLOptionalDataSourceMetadata.access_notes, + URLOptionalDataSourceMetadata.access_types + ) + .join( + URLRecordType, + URLRecordType.url_id == URL.id + ) + .join( + FlagURLValidated, + and_( + FlagURLValidated.url_id == URL.id, + FlagURLValidated.type == URLType.DATA_SOURCE + ) + ) + .outerjoin( + LinkBatchURL, + LinkBatchURL.url_id == URL.id + ) + .outerjoin( + URLOptionalDataSourceMetadata, + URLOptionalDataSourceMetadata.url_id == URL.id + ) + .options( + selectinload(URL.confirmed_agencies), + ) + ) \ No newline at end of file diff --git a/src/api/endpoints/data_source/_shared/process.py b/src/api/endpoints/data_source/_shared/process.py new file mode 100644 index 00000000..252ed7c0 --- /dev/null +++ b/src/api/endpoints/data_source/_shared/process.py @@ -0,0 +1,44 @@ +from sqlalchemy import RowMapping + +from src.api.endpoints.data_source.get.response import DataSourceGetResponse +from src.db.models.impl.link.batch_url.sqlalchemy import LinkBatchURL +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.models.impl.url.optional_ds_metadata.sqlalchemy import URLOptionalDataSourceMetadata +from src.db.models.impl.url.record_type.sqlalchemy import URLRecordType + + +def process_data_source_get_mapping( + mapping: RowMapping +) -> DataSourceGetResponse: + url: URL = mapping[URL] + + url_agency_ids: list[int] = [] + for agency in url.confirmed_agencies: + url_agency_ids.append(agency.id) + + return DataSourceGetResponse( + url_id=mapping[URL.id], + url=mapping[URL.url], + name=mapping[URL.name], + record_type=mapping[URLRecordType.record_type], + agency_ids=url_agency_ids, + description=mapping[URL.description], + batch_id=mapping[LinkBatchURL.batch_id], + record_formats=mapping[URLOptionalDataSourceMetadata.record_formats] or [], + data_portal_type=mapping[URLOptionalDataSourceMetadata.data_portal_type], + supplying_entity=mapping[URLOptionalDataSourceMetadata.supplying_entity], + coverage_start=mapping[URLOptionalDataSourceMetadata.coverage_start], + coverage_end=mapping[URLOptionalDataSourceMetadata.coverage_end], + agency_supplied=mapping[URLOptionalDataSourceMetadata.agency_supplied], + agency_aggregation=mapping[URLOptionalDataSourceMetadata.agency_aggregation], + agency_originated=mapping[URLOptionalDataSourceMetadata.agency_originated], + agency_described_not_in_database=mapping[URLOptionalDataSourceMetadata.agency_described_not_in_database], + update_method=mapping[URLOptionalDataSourceMetadata.update_method], + readme_url=mapping[URLOptionalDataSourceMetadata.readme_url], + originating_entity=mapping[URLOptionalDataSourceMetadata.originating_entity], + retention_schedule=mapping[URLOptionalDataSourceMetadata.retention_schedule], + scraper_url=mapping[URLOptionalDataSourceMetadata.scraper_url], + submission_notes=mapping[URLOptionalDataSourceMetadata.submission_notes], + access_notes=mapping[URLOptionalDataSourceMetadata.access_notes], + access_types=mapping[URLOptionalDataSourceMetadata.access_types] or [] + ) \ No newline at end of file diff --git a/src/api/endpoints/data_source/by_id/get/__init__.py b/src/api/endpoints/data_source/by_id/get/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/api/endpoints/data_source/by_id/get/query.py b/src/api/endpoints/data_source/by_id/get/query.py new file mode 100644 index 00000000..8f839543 --- /dev/null +++ b/src/api/endpoints/data_source/by_id/get/query.py @@ -0,0 +1,24 @@ +from sqlalchemy import Select, RowMapping +from sqlalchemy.ext.asyncio import AsyncSession + +from src.api.endpoints.data_source._shared.build import build_data_source_get_query +from src.api.endpoints.data_source._shared.process import process_data_source_get_mapping +from src.api.endpoints.data_source.get.response import DataSourceGetResponse +from src.db.models.impl.url.core.sqlalchemy import URL +from src.db.queries.base.builder import QueryBuilderBase + + +class GetDataSourceByIDQueryBuilder(QueryBuilderBase): + def __init__( + self, + url_id: int, + ): + super().__init__() + self.url_id = url_id + + async def run(self, session: AsyncSession) -> DataSourceGetResponse: + query: Select = build_data_source_get_query() + query = query.where(URL.id == self.url_id) + + mapping: RowMapping = await self.sh.mapping(session, query=query) + return process_data_source_get_mapping(mapping=mapping) \ No newline at end of file diff --git a/src/api/endpoints/data_source/get/query.py b/src/api/endpoints/data_source/get/query.py index 8766409d..cc167d62 100644 --- a/src/api/endpoints/data_source/get/query.py +++ b/src/api/endpoints/data_source/get/query.py @@ -1,10 +1,12 @@ from datetime import date from typing import Any, Sequence -from sqlalchemy import select, RowMapping, and_ +from sqlalchemy import select, RowMapping, and_, Select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload +from src.api.endpoints.data_source._shared.build import build_data_source_get_query +from src.api.endpoints.data_source._shared.process import process_data_source_get_mapping from src.api.endpoints.data_source.get.response import DataSourceGetOuterResponse, DataSourceGetResponse from src.core.enums import RecordType from src.db.models.impl.flag.url_validated.enums import URLType @@ -18,7 +20,7 @@ from src.db.queries.base.builder import QueryBuilderBase -class GetDataSourceQueryBuilder(QueryBuilderBase): +class GetDataSourcesQueryBuilder(QueryBuilderBase): def __init__( self, @@ -28,59 +30,9 @@ def __init__( self.page = page async def run(self, session: AsyncSession) -> DataSourceGetOuterResponse: + query: Select = build_data_source_get_query() query = ( - select( - URL, - URL.id, - URL.url, - - # Required Attributes - URL.name, - URLRecordType.record_type, - - # Optional Attributes - URL.description, - LinkBatchURL.batch_id, - URLOptionalDataSourceMetadata.record_formats, - URLOptionalDataSourceMetadata.data_portal_type, - URLOptionalDataSourceMetadata.supplying_entity, - URLOptionalDataSourceMetadata.coverage_start, - URLOptionalDataSourceMetadata.coverage_end, - URLOptionalDataSourceMetadata.agency_supplied, - URLOptionalDataSourceMetadata.agency_aggregation, - URLOptionalDataSourceMetadata.agency_described_not_in_database, - URLOptionalDataSourceMetadata.agency_originated, - URLOptionalDataSourceMetadata.update_method, - URLOptionalDataSourceMetadata.readme_url, - URLOptionalDataSourceMetadata.originating_entity, - URLOptionalDataSourceMetadata.retention_schedule, - URLOptionalDataSourceMetadata.scraper_url, - URLOptionalDataSourceMetadata.submission_notes, - URLOptionalDataSourceMetadata.access_notes, - URLOptionalDataSourceMetadata.access_types - ) - .join( - URLRecordType, - URLRecordType.url_id == URL.id - ) - .join( - FlagURLValidated, - and_( - FlagURLValidated.url_id == URL.id, - FlagURLValidated.type == URLType.DATA_SOURCE - ) - ) - .outerjoin( - LinkBatchURL, - LinkBatchURL.url_id == URL.id - ) - .outerjoin( - URLOptionalDataSourceMetadata, - URLOptionalDataSourceMetadata.url_id == URL.id - ) - .options( - selectinload(URL.confirmed_agencies), - ) + query .limit(100) .offset((self.page - 1) * 100) ) @@ -89,64 +41,8 @@ async def run(self, session: AsyncSession) -> DataSourceGetOuterResponse: responses: list[DataSourceGetResponse] = [] for mapping in mappings: - url: URL = mapping[URL] - url_id: int = mapping[URL.id] - url_url: str = mapping[URL.url] - url_name: str = mapping[URL.name] - url_record_type: RecordType = mapping[URLRecordType.record_type] - - url_agency_ids: list[int] = [] - for agency in url.confirmed_agencies: - url_agency_ids.append(agency.id) - - url_description: str | None = mapping[URL.description] - link_batch_url_batch_id: int | None = mapping[LinkBatchURL.batch_id] - url_record_formats: list[str] = mapping[URLOptionalDataSourceMetadata.record_formats] or [] - url_data_portal_type: str | None = mapping[URLOptionalDataSourceMetadata.data_portal_type] - url_supplying_entity: str | None = mapping[URLOptionalDataSourceMetadata.supplying_entity] - url_coverage_start: date | None = mapping[URLOptionalDataSourceMetadata.coverage_start] - url_coverage_end: date | None = mapping[URLOptionalDataSourceMetadata.coverage_end] - url_agency_supplied: bool | None = mapping[URLOptionalDataSourceMetadata.agency_supplied] - url_agency_aggregation: AgencyAggregationEnum | None = mapping[URLOptionalDataSourceMetadata.agency_aggregation] - url_agency_originated: bool | None = mapping[URLOptionalDataSourceMetadata.agency_originated] - url_agency_described_not_in_database: bool | None = mapping[URLOptionalDataSourceMetadata.agency_described_not_in_database] - url_update_method: UpdateMethodEnum | None = mapping[URLOptionalDataSourceMetadata.update_method] - url_readme_url: str | None = mapping[URLOptionalDataSourceMetadata.readme_url] - url_originating_entity: str | None = mapping[URLOptionalDataSourceMetadata.originating_entity] - url_retention_schedule: RetentionScheduleEnum | None = mapping[URLOptionalDataSourceMetadata.retention_schedule] - url_scraper_url: str | None = mapping[URLOptionalDataSourceMetadata.scraper_url] - url_submission_notes: str | None = mapping[URLOptionalDataSourceMetadata.submission_notes] - url_access_notes: str | None = mapping[URLOptionalDataSourceMetadata.access_notes] - url_access_types: list[AccessTypeEnum] = mapping[URLOptionalDataSourceMetadata.access_types] or [] - - responses.append( - DataSourceGetResponse( - url_id=url_id, - url=url_url, - name=url_name, - record_type=url_record_type, - agency_ids=url_agency_ids, - description=url_description, - batch_id=link_batch_url_batch_id, - record_formats=url_record_formats, - data_portal_type=url_data_portal_type, - supplying_entity=url_supplying_entity, - coverage_start=url_coverage_start, - coverage_end=url_coverage_end, - agency_supplied=url_agency_supplied, - agency_aggregation=url_agency_aggregation, - agency_originated=url_agency_originated, - agency_described_not_in_database=url_agency_described_not_in_database, - update_method=url_update_method, - readme_url=url_readme_url, - originating_entity=url_originating_entity, - retention_schedule=url_retention_schedule, - scraper_url=url_scraper_url, - submission_notes=url_submission_notes, - access_notes=url_access_notes, - access_types=url_access_types - ) - ) + response: DataSourceGetResponse = process_data_source_get_mapping(mapping) + responses.append(response) return DataSourceGetOuterResponse( results=responses, diff --git a/src/api/endpoints/data_source/routes.py b/src/api/endpoints/data_source/routes.py index 2464ceea..04d81f10 100644 --- a/src/api/endpoints/data_source/routes.py +++ b/src/api/endpoints/data_source/routes.py @@ -6,8 +6,9 @@ from src.api.endpoints.data_source.by_id.agency.get.wrapper import get_data_source_agencies_wrapper from src.api.endpoints.data_source.by_id.agency.post.wrapper import add_data_source_agency_link from src.api.endpoints.data_source.by_id.agency.shared.check import check_is_data_source_url -from src.api.endpoints.data_source.get.query import GetDataSourceQueryBuilder -from src.api.endpoints.data_source.get.response import DataSourceGetOuterResponse +from src.api.endpoints.data_source.by_id.get.query import GetDataSourceByIDQueryBuilder +from src.api.endpoints.data_source.get.query import GetDataSourcesQueryBuilder +from src.api.endpoints.data_source.get.response import DataSourceGetOuterResponse, DataSourceGetResponse from src.api.endpoints.data_source.by_id.put.query import UpdateDataSourceQueryBuilder from src.api.endpoints.data_source.by_id.put.request import DataSourcePutRequest from src.api.shared.models.message_response import MessageResponse @@ -28,7 +29,16 @@ async def get_data_sources( ), ) -> DataSourceGetOuterResponse: return await async_core.adb_client.run_query_builder( - GetDataSourceQueryBuilder(page=page) + GetDataSourcesQueryBuilder(page=page) + ) + +@data_sources_router.get("/{url_id}") +async def get_data_source_by_id( + url_id: int, + async_core: AsyncCore = Depends(get_async_core), +) -> DataSourceGetResponse: + return await async_core.adb_client.run_query_builder( + GetDataSourceByIDQueryBuilder(url_id) ) @data_sources_router.put("/{url_id}") @@ -81,3 +91,4 @@ async def remove_agency_from_data_source( adb_client=async_core.adb_client ) return MessageResponse(message="Agency removed from data source.") + diff --git a/tests/automated/integration/readonly/api/data_sources/by_id/__init__.py b/tests/automated/integration/readonly/api/data_sources/by_id/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/readonly/api/data_sources/by_id/agencies/__init__.py b/tests/automated/integration/readonly/api/data_sources/by_id/agencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/automated/integration/readonly/api/data_sources/agencies/test_forbid.py b/tests/automated/integration/readonly/api/data_sources/by_id/agencies/test_forbid.py similarity index 100% rename from tests/automated/integration/readonly/api/data_sources/agencies/test_forbid.py rename to tests/automated/integration/readonly/api/data_sources/by_id/agencies/test_forbid.py diff --git a/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py b/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py new file mode 100644 index 00000000..a874d054 --- /dev/null +++ b/tests/automated/integration/readonly/api/data_sources/by_id/test_get.py @@ -0,0 +1,12 @@ +import pytest + +from src.api.endpoints.data_source.get.response import DataSourceGetResponse +from tests.automated.integration.readonly.helper import ReadOnlyTestHelper + +@pytest.mark.asyncio +async def test_get_by_id(readonly_helper: ReadOnlyTestHelper): + raw_json: dict = readonly_helper.api_test_helper.request_validator.get_v3( + url=f"/data-sources/{readonly_helper.maximal_data_source}", + ) + # Test response is in expected form. + DataSourceGetResponse(**raw_json) \ No newline at end of file