diff --git a/README.md b/README.md index c0d8992..45b252e 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ The client defaults to the v2 API. For v1 usage, see [V1.md](V1.md). - **Uploads**: Create, retrieve, and manage file uploads - **Datasets**: Create, list, retry, cancel, and delete datasets +- **Entities**: Query entity metadata across datasets - **Jobs**: List available dataset jobs - **Health**: Check API health status @@ -69,9 +70,11 @@ upload = Upload( ) upload_id = client.uploads.create(upload) -# Get upload by ID or name +# Get upload by ID upload = client.uploads.get_by_id(upload_id) -upload = client.uploads.get_by_name("My Upload") + +# Get upload sample metadata +metadata = client.uploads.get_sample_metadata(upload_id) # Update sample metadata sample_metadata = SampleMetadata(data=[ @@ -81,6 +84,14 @@ sample_metadata = SampleMetadata(data=[ ]) client.uploads.update_sample_metadata(upload_id, sample_metadata) +# Query uploads with filters and pagination +result = client.uploads.query(status=["completed"], source=["maxquant"], search="my upload", page=1) +uploads = result["data"] +pagination = result["pagination"] + +# Delete an upload +client.uploads.delete(upload_id) + # Wait for upload processing to complete upload = client.uploads.wait_until_complete(upload_id) ``` @@ -100,9 +111,20 @@ dataset = Dataset( ) dataset_id = client.datasets.create(dataset) -# List datasets for an upload +# Get a single dataset by ID (includes error_message) +dataset = client.datasets.get_by_id(dataset_id) + +# List datasets for an upload (uses the query endpoint internally) datasets = client.datasets.list_by_upload(upload_id) +# Query datasets with filters and pagination +result = client.datasets.query(upload_id=upload_id, state=["COMPLETED"], type=["INTENSITY"], page=1) +datasets = result["data"] +pagination = result["pagination"] + +# Get a presigned URL for table download (csv or parquet) +url = client.datasets.download_table_url(dataset_id, "table_name", format="csv") + # Find the initial intensity dataset initial = client.datasets.find_initial_dataset(upload_id) @@ -119,6 +141,14 @@ client.datasets.delete(dataset_id) ds = client.datasets.wait_until_complete(upload_id, dataset_id) ``` +## Entities + +```python +# Query entity metadata (proteins, genes, peptides) across datasets +result = client.entities.query(keyword="BRCA1", dataset_ids=["dataset-id-1", "dataset-id-2"]) +entities = result["results"] +``` + ## Jobs ```python diff --git a/examples/dataset/download_table.py b/examples/dataset/download_table.py new file mode 100644 index 0000000..9801d12 --- /dev/null +++ b/examples/dataset/download_table.py @@ -0,0 +1,42 @@ +""" +Example of downloading a dataset table using the MD Python client +""" + +import os + +from dotenv import load_dotenv + +from md_python import MDClient + +load_dotenv() + + +def download_table_example(): + """Example of getting a presigned URL for a dataset table download""" + + # Initialize client (replace with your actual API token) + client = MDClient(api_token=os.getenv("API_TOKEN")) + + # Dataset ID and table name (replace with actual values) + dataset_id = "YOUR_DATASET_ID" + table_name = "YOUR_TABLE_NAME" + + # Get a CSV download URL + try: + url = client.datasets.download_table_url(dataset_id, table_name, format="csv") + print(f"CSV download URL: {url}") + except Exception as e: + print(f"Error getting CSV download URL: {e}") + + # Get a Parquet download URL + try: + url = client.datasets.download_table_url( + dataset_id, table_name, format="parquet" + ) + print(f"Parquet download URL: {url}") + except Exception as e: + print(f"Error getting Parquet download URL: {e}") + + +if __name__ == "__main__": + download_table_example() diff --git a/examples/dataset/get_by_id.py b/examples/dataset/get_by_id.py new file mode 100644 index 0000000..7190cd3 --- /dev/null +++ b/examples/dataset/get_by_id.py @@ -0,0 +1,36 @@ +""" +Example of getting a dataset by ID using the MD Python client +""" + +import os + +from dotenv import load_dotenv + +from md_python import MDClient + +load_dotenv() + + +def get_dataset_by_id_example(): + """Example of getting a single dataset by ID""" + + # Initialize client (replace with your actual API token) + client = MDClient(api_token=os.getenv("API_TOKEN")) + + # Dataset ID to retrieve (replace with actual dataset ID) + dataset_id = "YOUR_DATASET_ID" + + try: + dataset = client.datasets.get_by_id(dataset_id) + if dataset: + print(f"Dataset found: {dataset.name}") + print(f"State: {dataset.state}") + print(dataset) + else: + print(f"Dataset {dataset_id} not found") + except Exception as e: + print(f"Error getting dataset: {e}") + + +if __name__ == "__main__": + get_dataset_by_id_example() diff --git a/examples/dataset/query.py b/examples/dataset/query.py new file mode 100644 index 0000000..8dd02bc --- /dev/null +++ b/examples/dataset/query.py @@ -0,0 +1,58 @@ +""" +Example of querying datasets using the MD Python client +""" + +import os + +from dotenv import load_dotenv + +from md_python import MDClient + +load_dotenv() + + +def query_datasets_example(): + """Example of querying datasets with various filters""" + + # Initialize client (replace with your actual API token) + client = MDClient(api_token=os.getenv("API_TOKEN")) + + # Query all datasets (paginated) + try: + result = client.datasets.query(page=1) + print(f"Found {len(result['data'])} datasets on page 1") + print(f"Pagination: {result['pagination']}") + except Exception as e: + print(f"Error querying datasets: {e}") + + # Query datasets for a specific upload + try: + result = client.datasets.query(upload_id="YOUR_UPLOAD_ID") + for ds in result["data"]: + print( + f" {ds['id']}: {ds.get('name', 'unnamed')} ({ds.get('state', 'unknown')})" + ) + except Exception as e: + print(f"Error querying datasets: {e}") + + # Query with state and type filters + try: + result = client.datasets.query( + state=["COMPLETED"], + type=["INTENSITY"], + page=1, + ) + print(f"Found {len(result['data'])} completed intensity datasets") + except Exception as e: + print(f"Error querying datasets: {e}") + + # Query with a search term + try: + result = client.datasets.query(search="pairwise") + print(f"Found {len(result['data'])} datasets matching 'pairwise'") + except Exception as e: + print(f"Error querying datasets: {e}") + + +if __name__ == "__main__": + query_datasets_example() diff --git a/examples/experiment/get_experiment_by_name_example.py b/examples/experiment/get_experiment_by_name_example.py deleted file mode 100644 index 6aeacff..0000000 --- a/examples/experiment/get_experiment_by_name_example.py +++ /dev/null @@ -1,34 +0,0 @@ -""" -Example of getting an experiment by name using the MD Python client -""" - -import os - -from dotenv import load_dotenv - -from md_python import MDClient - -load_dotenv() - - -def main(): - # Initialize the client with your API token - client = MDClient(api_token=os.getenv("API_TOKEN")) - - # Example experiment name (replace with an actual experiment name) - experiment_name = "YOUR_EXPERIMENT_NAME" - - try: - # Get the experiment by name - experiment = client.experiments.get_by_name(experiment_name) - - print("Experiment found!") - print(experiment) - - except Exception as e: - print("Error retrieving experiment!") - print(e) - - -if __name__ == "__main__": - main() diff --git a/examples/upload/delete.py b/examples/upload/delete.py new file mode 100644 index 0000000..5105e6d --- /dev/null +++ b/examples/upload/delete.py @@ -0,0 +1,35 @@ +""" +Example of deleting an upload using the MD Python client +""" + +import os + +from dotenv import load_dotenv + +from md_python import MDClient + +load_dotenv() + + +def delete_upload_example(): + """Example of deleting an upload by ID""" + + # Initialize client (replace with your actual API token) + client = MDClient(api_token=os.getenv("API_TOKEN")) + + # Upload ID to delete (replace with actual upload ID) + upload_id = "YOUR_UPLOAD_ID" + + # Delete the upload + try: + success = client.uploads.delete(upload_id) + if success: + print(f"Upload {upload_id} deleted successfully!") + else: + print(f"Failed to delete upload {upload_id}") + except Exception as e: + print(f"Error deleting upload: {e}") + + +if __name__ == "__main__": + delete_upload_example() diff --git a/examples/upload/query.py b/examples/upload/query.py new file mode 100644 index 0000000..258ee3f --- /dev/null +++ b/examples/upload/query.py @@ -0,0 +1,58 @@ +""" +Example of querying uploads using the MD Python client +""" + +import os + +from dotenv import load_dotenv + +from md_python import MDClient + +load_dotenv() + + +def query_uploads_example(): + """Example of querying uploads with various filters""" + + # Initialize client (replace with your actual API token) + client = MDClient(api_token=os.getenv("API_TOKEN")) + + # Query all uploads (paginated) + try: + result = client.uploads.query(page=1) + print(f"Found {len(result['data'])} uploads on page 1") + print(f"Pagination: {result['pagination']}") + except Exception as e: + print(f"Error querying uploads: {e}") + + # Query with a search term + try: + result = client.uploads.query(search="my experiment") + for upload in result["data"]: + print(f" {upload['id']}: {upload['name']}") + except Exception as e: + print(f"Error querying uploads: {e}") + + # Query with status and source filters + try: + result = client.uploads.query( + status=["completed"], + source=["maxquant"], + page=1, + ) + print(f"Found {len(result['data'])} completed maxquant uploads") + except Exception as e: + print(f"Error querying uploads: {e}") + + # Query with sample metadata filters + try: + result = client.uploads.query( + sample_metadata=[{"key": "condition", "value": "treated"}], + ) + print(f"Found {len(result['data'])} uploads matching metadata filter") + except Exception as e: + print(f"Error querying uploads: {e}") + + +if __name__ == "__main__": + query_uploads_example() diff --git a/pyproject.toml b/pyproject.toml index 4eeef96..6046170 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "md-python" -version = "0.2.2" +version = "0.2.3" description = "Python client for Mass Dynamics API" readme = "README.md" requires-python = ">=3.11" diff --git a/src/md_python/base_client.py b/src/md_python/base_client.py index d27531f..8baa4e1 100644 --- a/src/md_python/base_client.py +++ b/src/md_python/base_client.py @@ -3,7 +3,7 @@ """ import os -from typing import Optional +from typing import Any, Optional import requests from dotenv import load_dotenv @@ -44,6 +44,7 @@ def _make_request( endpoint: str, headers: Optional[dict] = None, json: Optional[dict] = None, + **kwargs: Any, ) -> requests.Response: """Make HTTP request to the API""" url = f"{self.base_url}{endpoint}" @@ -52,4 +53,6 @@ def _make_request( if headers: request_headers.update(headers) - return requests.request(method, url, headers=request_headers, json=json) + return requests.request( + method, url, headers=request_headers, json=json, **kwargs + ) diff --git a/src/md_python/client_v2.py b/src/md_python/client_v2.py index bfa66a3..73ec4cb 100644 --- a/src/md_python/client_v2.py +++ b/src/md_python/client_v2.py @@ -6,11 +6,11 @@ from .base_client import BaseMDClient from .resources import Health -from .resources.v2 import Datasets, Jobs, Uploads +from .resources.v2 import Datasets, Entities, Jobs, Uploads class MDClientV2(BaseMDClient): - """V2 API client — uploads, datasets, jobs, health""" + """V2 API client — uploads, datasets, entities, jobs, health""" ACCEPT_HEADER = "application/vnd.md-v2+json" @@ -19,4 +19,5 @@ def __init__(self, api_token: Optional[str] = None, base_url: Optional[str] = No self.health = Health(self) self.uploads = Uploads(self) self.datasets = Datasets(self) + self.entities = Entities(self) self.jobs = Jobs(self) diff --git a/src/md_python/models/dataset.py b/src/md_python/models/dataset.py index c33a4cb..31bcf95 100644 --- a/src/md_python/models/dataset.py +++ b/src/md_python/models/dataset.py @@ -24,6 +24,7 @@ class Dataset: id: Optional[UUID] = None sample_names: Optional[List[str]] = None job_run_start_time: Optional[datetime] = None + error_message: Optional[str] = None def __str__(self) -> str: """Return a readable string representation of the dataset""" @@ -86,4 +87,5 @@ def from_json(cls, data: Dict[str, Any]) -> "Dataset": type=data.get("type"), state=data.get("state"), job_run_start_time=job_run_start_time, + error_message=data.get("error_message"), ) diff --git a/src/md_python/resources/v2/__init__.py b/src/md_python/resources/v2/__init__.py index e53e15a..421069c 100644 --- a/src/md_python/resources/v2/__init__.py +++ b/src/md_python/resources/v2/__init__.py @@ -3,7 +3,8 @@ """ from .datasets import Datasets +from .entities import Entities from .jobs import Jobs from .uploads import Uploads -__all__ = ["Uploads", "Datasets", "Jobs"] +__all__ = ["Uploads", "Datasets", "Entities", "Jobs"] diff --git a/src/md_python/resources/v2/datasets.py b/src/md_python/resources/v2/datasets.py index f6780ae..427aa48 100644 --- a/src/md_python/resources/v2/datasets.py +++ b/src/md_python/resources/v2/datasets.py @@ -54,17 +54,95 @@ def create(self, dataset: Dataset) -> str: def list_by_upload(self, upload_id: str) -> List[Dataset]: """Get datasets belonging to an upload""" response = self._client._make_request( - method="GET", - endpoint=f"/datasets?experiment_id={upload_id}", + method="POST", + endpoint="/datasets/query", + json={"upload_id": upload_id}, + headers={"Content-Type": "application/json"}, ) if response.status_code == 200: - return [Dataset.from_json(d) for d in response.json()] + return [Dataset.from_json(d) for d in response.json().get("data", [])] else: raise Exception( f"Failed to get datasets: {response.status_code} - {response.text}" ) + def get_by_id(self, dataset_id: str) -> Optional[Dataset]: + """Get a single dataset by ID""" + response = self._client._make_request( + method="GET", + endpoint=f"/datasets/{dataset_id}", + ) + + if response.status_code == 404: + return None + if response.status_code != 200: + raise Exception( + f"Failed to get dataset: {response.status_code} - {response.text}" + ) + return Dataset.from_json(response.json()) + + def download_table_url( + self, dataset_id: str, table_name: str, format: str = "csv" + ) -> str: + """Get a presigned download URL for a dataset table. + + The API returns a 302 redirect to a presigned URL. + """ + if format not in ("csv", "parquet"): + raise ValueError(f"format must be 'csv' or 'parquet', got '{format}'") + + response = self._client._make_request( + method="GET", + endpoint=f"/datasets/{dataset_id}/tables/{table_name}.{format}", + allow_redirects=False, + ) + + if response.status_code == 302: + location = response.headers.get("Location") + if location: + return location + raise Exception("302 response missing Location header") + else: + raise Exception( + f"Failed to get download URL: {response.status_code} - {response.text}" + ) + + def query( + self, + upload_id: Optional[str] = None, + state: Optional[List[str]] = None, + type: Optional[List[str]] = None, + search: Optional[str] = None, + page: int = 1, + ) -> Dict[str, Any]: + """Query datasets with filters""" + payload: Dict[str, Any] = {"page": page} + + if upload_id is not None: + payload["upload_id"] = upload_id + if state is not None: + payload["state"] = state + if type is not None: + payload["type"] = type + if search is not None: + payload["search"] = search + + response = self._client._make_request( + method="POST", + endpoint="/datasets/query", + json=payload, + headers={"Content-Type": "application/json"}, + ) + + if response.status_code == 200: + result: Dict[str, Any] = response.json() + return result + else: + raise Exception( + f"Failed to query datasets: {response.status_code} - {response.text}" + ) + def delete(self, dataset_id: str) -> bool: """Delete a dataset by ID""" response = self._client._make_request( diff --git a/src/md_python/resources/v2/entities.py b/src/md_python/resources/v2/entities.py new file mode 100644 index 0000000..a4ecf3c --- /dev/null +++ b/src/md_python/resources/v2/entities.py @@ -0,0 +1,40 @@ +""" +Entities resource for the MD Python v2 client +""" + +from typing import TYPE_CHECKING, Any, Dict, List + +if TYPE_CHECKING: + from ...base_client import BaseMDClient + + +class Entities: + """V2 entities resource""" + + def __init__(self, client: "BaseMDClient"): + self._client = client + + def query(self, keyword: str, dataset_ids: List[str]) -> Dict[str, Any]: + """Query entity metadata across one or more datasets. + + Args: + keyword: Search keyword (min 2 characters) + dataset_ids: List of dataset IDs to search across + + Returns: + Response dict with a 'results' key + """ + response = self._client._make_request( + method="POST", + endpoint="/entities/query", + json={"keyword": keyword, "dataset_ids": dataset_ids}, + headers={"Content-Type": "application/json"}, + ) + + if response.status_code == 200: + result: Dict[str, Any] = response.json() + return result + else: + raise Exception( + f"Failed to query entities: {response.status_code} - {response.text}" + ) diff --git a/src/md_python/resources/v2/uploads.py b/src/md_python/resources/v2/uploads.py index 4de8d11..3daf748 100644 --- a/src/md_python/resources/v2/uploads.py +++ b/src/md_python/resources/v2/uploads.py @@ -36,9 +36,6 @@ def create(self, upload: Upload) -> str: if upload.file_location and not upload.filenames: raise ValueError("filenames must be provided when using file_location") - if not upload.experiment_design: - raise ValueError("experiment_design is required") - if not upload.sample_metadata: raise ValueError("sample_metadata is required") @@ -46,10 +43,12 @@ def create(self, upload: Upload) -> str: "name": upload.name, "source": upload.source, "filenames": upload.filenames, - "experiment_design": upload.experiment_design.data, "sample_metadata": upload.sample_metadata.data, } + if upload.experiment_design: + payload["experiment_design"] = upload.experiment_design.data + if upload.file_location: payload["file_location"] = upload.file_location if upload.filenames: @@ -100,17 +99,71 @@ def get_by_id(self, upload_id: str) -> Optional[Upload]: f"Failed to get upload: {response.status_code} - {response.text}" ) - def get_by_name(self, name: str) -> Optional[Upload]: - """Get an upload by its name""" + def delete(self, upload_id: str) -> bool: + """Delete an upload by ID""" + response = self._client._make_request( + method="DELETE", + endpoint=f"/uploads/{upload_id}", + ) + + if response.status_code == 204: + return True + else: + raise Exception( + f"Failed to delete upload: {response.status_code} - {response.text}" + ) + + def get_sample_metadata(self, upload_id: str) -> Optional[SampleMetadata]: + """Get an upload's sample metadata""" response = self._client._make_request( - method="GET", endpoint=f"/uploads?name={name}" + method="GET", + endpoint=f"/uploads/{upload_id}/sample_metadata", ) if response.status_code == 200: - return Upload.from_json(response.json()) + data = response.json() + raw = data.get("sample_metadata") + if raw is not None: + return SampleMetadata(data=raw) + return None + else: + raise Exception( + f"Failed to get sample metadata: {response.status_code} - {response.text}" + ) + + def query( + self, + status: Optional[List[str]] = None, + source: Optional[List[str]] = None, + search: Optional[str] = None, + sample_metadata: Optional[List[Dict[str, str]]] = None, + page: int = 1, + ) -> Dict[str, Any]: + """Query uploads with filters""" + payload: Dict[str, Any] = {"page": page} + + if status is not None: + payload["status"] = status + if source is not None: + payload["source"] = source + if search is not None: + payload["search"] = search + if sample_metadata is not None: + payload["sample_metadata"] = sample_metadata + + response = self._client._make_request( + method="POST", + endpoint="/uploads/query", + json=payload, + headers={"Content-Type": "application/json"}, + ) + + if response.status_code == 200: + result: Dict[str, Any] = response.json() + return result else: raise Exception( - f"Failed to get upload by name: {response.status_code} - {response.text}" + f"Failed to query uploads: {response.status_code} - {response.text}" ) def update_sample_metadata( diff --git a/tests/models/test_dataset.py b/tests/models/test_dataset.py index 4796db7..4593d81 100644 --- a/tests/models/test_dataset.py +++ b/tests/models/test_dataset.py @@ -28,6 +28,7 @@ def test_init_minimal(self): assert dataset.sample_names is None assert dataset.job_run_params == {} assert dataset.job_run_start_time is None + assert dataset.error_message is None def test_init_full(self): """Test Dataset initialization with all fields""" @@ -67,3 +68,31 @@ def test_input_dataset_ids_with_strings(self): # This test documents the current behavior assert dataset.name == "Test Dataset" assert dataset.job_slug == "test_job" + + def test_from_json_with_error_message(self): + data = { + "id": "123e4567-e89b-12d3-a456-426614174000", + "input_dataset_ids": [], + "name": "Failed Dataset", + "job_slug": "test_job", + "job_run_params": {}, + "state": "FAILED", + "error_message": "Processing failed: out of memory", + } + + dataset = Dataset.from_json(data) + + assert dataset.error_message == "Processing failed: out of memory" + assert dataset.state == "FAILED" + + def test_from_json_without_error_message(self): + data = { + "input_dataset_ids": [], + "name": "OK Dataset", + "job_slug": "test_job", + "job_run_params": {}, + } + + dataset = Dataset.from_json(data) + + assert dataset.error_message is None diff --git a/tests/resources/v2/test_datasets.py b/tests/resources/v2/test_datasets.py index 039852d..80790ab 100644 --- a/tests/resources/v2/test_datasets.py +++ b/tests/resources/v2/test_datasets.py @@ -90,14 +90,17 @@ def test_create_failure(self, datasets, sample_dataset, mock_client): def test_list_by_upload_success(self, datasets, mock_client): mock_response = Mock() mock_response.status_code = 200 - mock_response.json.return_value = [ - { - "id": "a1b2c3d4e5f67890a1b2c3d4e5f67890", - "name": "DS1", - "job_slug": "flow_1", - "job_run_params": {}, - } - ] + mock_response.json.return_value = { + "data": [ + { + "id": "a1b2c3d4e5f67890a1b2c3d4e5f67890", + "name": "DS1", + "job_slug": "flow_1", + "job_run_params": {}, + } + ], + "pagination": {"page": 1}, + } mock_client._make_request.return_value = mock_response result = datasets.list_by_upload("upload-1") @@ -107,18 +110,19 @@ def test_list_by_upload_success(self, datasets, mock_client): assert result[0].name == "DS1" call_args = mock_client._make_request.call_args - assert call_args[1]["endpoint"] == "/datasets?experiment_id=upload-1" + assert call_args[1]["method"] == "POST" + assert call_args[1]["endpoint"] == "/datasets/query" + assert call_args[1]["json"] == {"upload_id": "upload-1"} - def test_list_by_upload_no_custom_headers(self, datasets, mock_client): + def test_list_by_upload_empty(self, datasets, mock_client): mock_response = Mock() mock_response.status_code = 200 - mock_response.json.return_value = [] + mock_response.json.return_value = {"data": [], "pagination": {}} mock_client._make_request.return_value = mock_response - datasets.list_by_upload("upload-1") + result = datasets.list_by_upload("upload-1") - call_args = mock_client._make_request.call_args - assert "headers" not in call_args[1] or call_args[1].get("headers") is None + assert result == [] def test_list_by_upload_failure(self, datasets, mock_client): mock_response = Mock() @@ -192,6 +196,140 @@ def test_cancel_failure(self, datasets, mock_client): with pytest.raises(Exception, match="Failed to cancel dataset: 400"): datasets.cancel("ds-1") + def test_get_by_id_success(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "id": "11111111-1111-1111-1111-111111111111", + "name": "DS1", + "job_slug": "flow_1", + "job_run_params": {}, + "input_dataset_ids": [], + } + mock_client._make_request.return_value = mock_response + + result = datasets.get_by_id("11111111-1111-1111-1111-111111111111") + + assert isinstance(result, Dataset) + assert result.name == "DS1" + + call_args = mock_client._make_request.call_args + assert call_args[1]["method"] == "GET" + assert ( + call_args[1]["endpoint"] == "/datasets/11111111-1111-1111-1111-111111111111" + ) + + def test_get_by_id_not_found(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 404 + mock_client._make_request.return_value = mock_response + + result = datasets.get_by_id("nonexistent") + + assert result is None + + def test_get_by_id_failure(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = "Server error" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to get dataset: 500"): + datasets.get_by_id("ds-1") + + def test_download_table_url_success(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 302 + mock_response.headers = {"Location": "https://s3.amazonaws.com/presigned-url"} + mock_client._make_request.return_value = mock_response + + result = datasets.download_table_url("ds-1", "intensity", format="csv") + + assert result == "https://s3.amazonaws.com/presigned-url" + + call_args = mock_client._make_request.call_args + assert call_args[1]["method"] == "GET" + assert call_args[1]["endpoint"] == "/datasets/ds-1/tables/intensity.csv" + assert call_args[1]["allow_redirects"] is False + + def test_download_table_url_parquet(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 302 + mock_response.headers = { + "Location": "https://s3.amazonaws.com/presigned-parquet" + } + mock_client._make_request.return_value = mock_response + + result = datasets.download_table_url("ds-1", "intensity", format="parquet") + + assert result == "https://s3.amazonaws.com/presigned-parquet" + + call_args = mock_client._make_request.call_args + assert call_args[1]["endpoint"] == "/datasets/ds-1/tables/intensity.parquet" + + def test_download_table_url_invalid_format(self, datasets): + with pytest.raises(ValueError, match="format must be 'csv' or 'parquet'"): + datasets.download_table_url("ds-1", "intensity", format="json") + + def test_download_table_url_failure(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 404 + mock_response.text = "Not found" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to get download URL: 404"): + datasets.download_table_url("ds-1", "intensity") + + def test_query_with_all_filters(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "data": [{"name": "DS1", "job_slug": "flow_1"}], + "pagination": {"page": 1, "total_pages": 1}, + } + mock_client._make_request.return_value = mock_response + + result = datasets.query( + upload_id="upload-1", + state=["COMPLETED"], + type=["INTENSITY"], + search="test", + page=2, + ) + + assert result["data"][0]["name"] == "DS1" + + call_args = mock_client._make_request.call_args + assert call_args[1]["method"] == "POST" + assert call_args[1]["endpoint"] == "/datasets/query" + + payload = call_args[1]["json"] + assert payload["upload_id"] == "upload-1" + assert payload["state"] == ["COMPLETED"] + assert payload["type"] == ["INTENSITY"] + assert payload["search"] == "test" + assert payload["page"] == 2 + + def test_query_with_defaults(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"data": [], "pagination": {}} + mock_client._make_request.return_value = mock_response + + datasets.query() + + payload = mock_client._make_request.call_args[1]["json"] + assert payload == {"page": 1} + + def test_query_failure(self, datasets, mock_client): + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = "Server error" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to query datasets: 500"): + datasets.query() + def test_wait_until_complete_success(self, datasets, mock_client, mocker): completed_ds = Dataset( input_dataset_ids=[], diff --git a/tests/resources/v2/test_entities.py b/tests/resources/v2/test_entities.py new file mode 100644 index 0000000..cba8093 --- /dev/null +++ b/tests/resources/v2/test_entities.py @@ -0,0 +1,91 @@ +from unittest.mock import Mock + +import pytest + +from md_python.client_v2 import MDClientV2 +from md_python.resources.v2.entities import Entities + + +class TestV2Entities: + + @pytest.fixture + def mock_client(self): + return Mock(spec=MDClientV2) + + @pytest.fixture + def entities(self, mock_client): + return Entities(mock_client) + + def test_query_success(self, entities, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "results": [ + {"gene_name": "BRCA1", "dataset_id": "abc-123"}, + {"gene_name": "BRCA1", "dataset_id": "def-456"}, + ] + } + mock_client._make_request.return_value = mock_response + + result = entities.query(keyword="BRCA1", dataset_ids=["abc-123", "def-456"]) + + assert "results" in result + assert len(result["results"]) == 2 + assert result["results"][0]["gene_name"] == "BRCA1" + + call_args = mock_client._make_request.call_args + assert call_args[1]["method"] == "POST" + assert call_args[1]["endpoint"] == "/entities/query" + assert call_args[1]["json"] == { + "keyword": "BRCA1", + "dataset_ids": ["abc-123", "def-456"], + } + + def test_query_empty_results(self, entities, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"results": []} + mock_client._make_request.return_value = mock_response + + result = entities.query(keyword="NONEXISTENT", dataset_ids=["abc-123"]) + + assert result == {"results": []} + + def test_query_single_dataset(self, entities, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"results": [{"gene_name": "TP53"}]} + mock_client._make_request.return_value = mock_response + + result = entities.query(keyword="TP53", dataset_ids=["abc-123"]) + + call_args = mock_client._make_request.call_args + assert call_args[1]["json"]["dataset_ids"] == ["abc-123"] + assert len(result["results"]) == 1 + + def test_query_failure(self, entities, mock_client): + mock_response = Mock() + mock_response.status_code = 502 + mock_response.text = "Entity search failed" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to query entities: 502"): + entities.query(keyword="BRCA1", dataset_ids=["abc-123"]) + + def test_query_forbidden(self, entities, mock_client): + mock_response = Mock() + mock_response.status_code = 403 + mock_response.text = "Forbidden" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to query entities: 403"): + entities.query(keyword="BRCA1", dataset_ids=["abc-123"]) + + def test_query_bad_request(self, entities, mock_client): + mock_response = Mock() + mock_response.status_code = 400 + mock_response.text = "keyword is too short" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to query entities: 400"): + entities.query(keyword="A", dataset_ids=["abc-123"]) diff --git a/tests/resources/v2/test_uploads.py b/tests/resources/v2/test_uploads.py index 658a2ea..ce3e543 100644 --- a/tests/resources/v2/test_uploads.py +++ b/tests/resources/v2/test_uploads.py @@ -131,17 +131,6 @@ def test_create_validation_file_location_without_filenames(self, uploads): with pytest.raises(ValueError, match="filenames must be provided"): uploads.create(upload) - def test_create_validation_missing_experiment_design(self, uploads): - upload = Upload( - name="Bad", - source="maxquant", - s3_bucket="bucket", - filenames=["a.txt"], - ) - - with pytest.raises(ValueError, match="experiment_design is required"): - uploads.create(upload) - def test_create_validation_missing_sample_metadata(self, uploads): upload = Upload( name="Bad", @@ -199,31 +188,115 @@ def test_get_by_id_failure(self, uploads, mock_client): with pytest.raises(Exception, match="Failed to get upload: 404"): uploads.get_by_id("bad-id") - def test_get_by_name_success(self, uploads, mock_client): + def test_delete_success(self, uploads, mock_client): + mock_response = Mock() + mock_response.status_code = 204 + mock_client._make_request.return_value = mock_response + + result = uploads.delete("upload-1") + + assert result is True + call_args = mock_client._make_request.call_args + assert call_args[1]["method"] == "DELETE" + assert call_args[1]["endpoint"] == "/uploads/upload-1" + + def test_delete_failure(self, uploads, mock_client): + mock_response = Mock() + mock_response.status_code = 404 + mock_response.text = "Not found" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to delete upload: 404"): + uploads.delete("upload-1") + + def test_get_sample_metadata_success(self, uploads, mock_client): mock_response = Mock() mock_response.status_code = 200 mock_response.json.return_value = { - "name": "Named Upload", - "source": "maxquant", + "sample_metadata": [ + ["sample_name", "dose"], + ["s1", "1"], + ] } mock_client._make_request.return_value = mock_response - result = uploads.get_by_name("Named Upload") + result = uploads.get_sample_metadata("upload-1") - assert isinstance(result, Upload) - assert result.name == "Named Upload" + assert isinstance(result, SampleMetadata) + assert result.data == [["sample_name", "dose"], ["s1", "1"]] call_args = mock_client._make_request.call_args - assert call_args[1]["endpoint"] == "/uploads?name=Named Upload" + assert call_args[1]["method"] == "GET" + assert call_args[1]["endpoint"] == "/uploads/upload-1/sample_metadata" - def test_get_by_name_failure(self, uploads, mock_client): + def test_get_sample_metadata_returns_none_when_missing(self, uploads, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_client._make_request.return_value = mock_response + + result = uploads.get_sample_metadata("upload-1") + + assert result is None + + def test_get_sample_metadata_failure(self, uploads, mock_client): mock_response = Mock() mock_response.status_code = 404 mock_response.text = "Not found" mock_client._make_request.return_value = mock_response - with pytest.raises(Exception, match="Failed to get upload by name: 404"): - uploads.get_by_name("nope") + with pytest.raises(Exception, match="Failed to get sample metadata: 404"): + uploads.get_sample_metadata("upload-1") + + def test_query_with_all_filters(self, uploads, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "data": [{"name": "Upload 1", "source": "maxquant"}], + "pagination": {"page": 1, "total_pages": 1}, + } + mock_client._make_request.return_value = mock_response + + result = uploads.query( + status=["COMPLETED"], + source=["maxquant"], + search="test", + sample_metadata=[{"column": "dose", "value": "1"}], + page=2, + ) + + assert result["data"][0]["name"] == "Upload 1" + + call_args = mock_client._make_request.call_args + assert call_args[1]["method"] == "POST" + assert call_args[1]["endpoint"] == "/uploads/query" + + payload = call_args[1]["json"] + assert payload["status"] == ["COMPLETED"] + assert payload["source"] == ["maxquant"] + assert payload["search"] == "test" + assert payload["sample_metadata"] == [{"column": "dose", "value": "1"}] + assert payload["page"] == 2 + + def test_query_with_defaults(self, uploads, mock_client): + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"data": [], "pagination": {}} + mock_client._make_request.return_value = mock_response + + uploads.query() + + payload = mock_client._make_request.call_args[1]["json"] + assert payload == {"page": 1} + + def test_query_failure(self, uploads, mock_client): + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = "Server error" + mock_client._make_request.return_value = mock_response + + with pytest.raises(Exception, match="Failed to query uploads: 500"): + uploads.query() def test_update_sample_metadata_success(self, uploads, mock_client): sm = SampleMetadata(data=[["group"], ["a"], ["b"]])