Merge pull request #258 from juliamcclellan/clusters_refresh

fix clusters with data refresh
Datatamer · Aug 13, 2019 · 528a1fe · 528a1fe
2 parents ef6cfce + 00b629e
commit 528a1fe
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
   **BUG FIXES**
   - [#235](https://github.com/Datatamer/unify-client-python/issues/235) Making `AttributeCollection` retrieve attributes directly instead of by streaming
+  - [#256](https://github.com/Datatamer/unify-client-python/issues/256) Record and published clusters refresh did not use the correct endpoint
 
 ## 0.8.0
   **BREAKING CHANGES**

diff --git a/tamr_unify_client/mastering/project.py b/tamr_unify_client/mastering/project.py
@@ -202,9 +202,9 @@ def record_clusters_with_data(self):
         # being able to call refresh on resulting dataset. Until then, we grab
         # the dataset by constructing its name from the corresponding Unified Dataset's name
         name = unified_dataset.name + "_dedup_clusters_with_data"
-        return self.client.datasets.by_name(name)
-
-        # super.__repr__ is sufficient
+        dataset = self.client.datasets.by_name(name)
+        dataset.api_path = self.api_path + "/recordClustersWithData"
+        return dataset
 
     def published_clusters_with_data(self):
         """Project's unified dataset with associated clusters.
@@ -215,7 +215,9 @@ def published_clusters_with_data(self):
 
         unified_dataset = self.unified_dataset()
         name = unified_dataset.name + "_dedup_published_clusters_with_data"
-        return self.client.datasets.by_name(name)
+        dataset = self.client.datasets.by_name(name)
+        dataset.api_path = self.api_path + "/publishedClustersWithData"
+        return dataset
 
     def binning_model(self):
         """
@@ -229,3 +231,5 @@ def binning_model(self):
         # Cannot get this resource and so we hard code
         resource_json = {"relativeId": alias}
         return BinningModel.from_json(self.client, resource_json, alias)
+
+    # super.__repr__ is sufficient
diff --git a/tests/unit/test_published_clusters_with_data.py b/tests/unit/test_published_clusters_with_data.py
@@ -31,6 +31,28 @@ def test_published_clusters_with_data():
         "version": "251",
     }
 
+    refresh_json = {
+        "id": "93",
+        "type": "SPARK",
+        "description": "Publish clusters",
+        "status": {
+            "state": "SUCCEEDED",
+            "startTime": "2019-06-24T15:58:56.595Z",
+            "endTime": "2019-06-24T15:59:17.084Z",
+        },
+        "created": {
+            "username": "admin",
+            "time": "2019-06-24T15:58:48.734Z",
+            "version": "2407",
+        },
+        "lastModified": {
+            "username": "system",
+            "time": "2019-06-24T15:59:18.350Z",
+            "version": "2423",
+        },
+        "relativeId": "operations/93",
+    }
+
     datasets_json = [pcwd_json]
 
     unify = Client(UsernamePasswordAuth("username", "password"))
@@ -42,10 +64,16 @@ def test_published_clusters_with_data():
         f"http://localhost:9100/api/versioned/v1/projects/{project_id}/unifiedDataset"
     )
     datasets_url = f"http://localhost:9100/api/versioned/v1/datasets"
+    refresh_url = project_url + "/publishedClustersWithData:refresh"
 
     responses.add(responses.GET, project_url, json=project_config)
     responses.add(responses.GET, unified_dataset_url, json=unified_dataset_json)
     responses.add(responses.GET, datasets_url, json=datasets_json)
+    responses.add(responses.POST, refresh_url, json=refresh_json)
+
     project = unify.projects.by_resource_id(project_id)
     actual_pcwd_dataset = project.as_mastering().published_clusters_with_data()
     assert actual_pcwd_dataset.name == pcwd_json["name"]
+
+    op = actual_pcwd_dataset.refresh(poll_interval_seconds=0)
+    assert op.succeeded()
diff --git a/tests/unit/test_record_clusters_with_data.py b/tests/unit/test_record_clusters_with_data.py
@@ -32,6 +32,28 @@ def test_record_clusters_with_data():
         "version": "251",
     }
 
+    refresh_json = {
+        "id": "93",
+        "type": "SPARK",
+        "description": "Clustering",
+        "status": {
+            "state": "SUCCEEDED",
+            "startTime": "2019-06-24T15:58:56.595Z",
+            "endTime": "2019-06-24T15:59:17.084Z",
+        },
+        "created": {
+            "username": "admin",
+            "time": "2019-06-24T15:58:48.734Z",
+            "version": "2407",
+        },
+        "lastModified": {
+            "username": "system",
+            "time": "2019-06-24T15:59:18.350Z",
+            "version": "2423",
+        },
+        "relativeId": "operations/93",
+    }
+
     datasets_json = [rcwd_json]
 
     unify = Client(UsernamePasswordAuth("username", "password"))
@@ -43,10 +65,16 @@ def test_record_clusters_with_data():
         f"http://localhost:9100/api/versioned/v1/projects/{project_id}/unifiedDataset"
     )
     datasets_url = f"http://localhost:9100/api/versioned/v1/datasets"
+    refresh_url = project_url + "/recordClustersWithData:refresh"
 
     responses.add(responses.GET, project_url, json=project_config)
     responses.add(responses.GET, unified_dataset_url, json=unified_dataset_json)
     responses.add(responses.GET, datasets_url, json=datasets_json)
+    responses.add(responses.POST, refresh_url, json=refresh_json)
+
     project = unify.projects.by_resource_id(project_id)
     actual_rcwd_dataset = project.as_mastering().record_clusters_with_data()
     assert actual_rcwd_dataset.name == rcwd_json["name"]
+
+    op = actual_rcwd_dataset.refresh(poll_interval_seconds=0)
+    assert op.succeeded()