Skip to content

Commit

Permalink
Merge pull request #258 from juliamcclellan/clusters_refresh
Browse files Browse the repository at this point in the history
fix clusters with data refresh
  • Loading branch information
nbateshaus committed Aug 13, 2019
2 parents ef6cfce + 00b629e commit 528a1fe
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

**BUG FIXES**
- [#235](https://github.com/Datatamer/unify-client-python/issues/235) Making `AttributeCollection` retrieve attributes directly instead of by streaming
- [#256](https://github.com/Datatamer/unify-client-python/issues/256) Record and published clusters refresh did not use the correct endpoint

## 0.8.0
**BREAKING CHANGES**
Expand Down
12 changes: 8 additions & 4 deletions tamr_unify_client/mastering/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ def record_clusters_with_data(self):
# being able to call refresh on resulting dataset. Until then, we grab
# the dataset by constructing its name from the corresponding Unified Dataset's name
name = unified_dataset.name + "_dedup_clusters_with_data"
return self.client.datasets.by_name(name)

# super.__repr__ is sufficient
dataset = self.client.datasets.by_name(name)
dataset.api_path = self.api_path + "/recordClustersWithData"
return dataset

def published_clusters_with_data(self):
"""Project's unified dataset with associated clusters.
Expand All @@ -215,7 +215,9 @@ def published_clusters_with_data(self):

unified_dataset = self.unified_dataset()
name = unified_dataset.name + "_dedup_published_clusters_with_data"
return self.client.datasets.by_name(name)
dataset = self.client.datasets.by_name(name)
dataset.api_path = self.api_path + "/publishedClustersWithData"
return dataset

def binning_model(self):
"""
Expand All @@ -229,3 +231,5 @@ def binning_model(self):
# Cannot get this resource and so we hard code
resource_json = {"relativeId": alias}
return BinningModel.from_json(self.client, resource_json, alias)

# super.__repr__ is sufficient
28 changes: 28 additions & 0 deletions tests/unit/test_published_clusters_with_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@ def test_published_clusters_with_data():
"version": "251",
}

refresh_json = {
"id": "93",
"type": "SPARK",
"description": "Publish clusters",
"status": {
"state": "SUCCEEDED",
"startTime": "2019-06-24T15:58:56.595Z",
"endTime": "2019-06-24T15:59:17.084Z",
},
"created": {
"username": "admin",
"time": "2019-06-24T15:58:48.734Z",
"version": "2407",
},
"lastModified": {
"username": "system",
"time": "2019-06-24T15:59:18.350Z",
"version": "2423",
},
"relativeId": "operations/93",
}

datasets_json = [pcwd_json]

unify = Client(UsernamePasswordAuth("username", "password"))
Expand All @@ -42,10 +64,16 @@ def test_published_clusters_with_data():
f"http://localhost:9100/api/versioned/v1/projects/{project_id}/unifiedDataset"
)
datasets_url = f"http://localhost:9100/api/versioned/v1/datasets"
refresh_url = project_url + "/publishedClustersWithData:refresh"

responses.add(responses.GET, project_url, json=project_config)
responses.add(responses.GET, unified_dataset_url, json=unified_dataset_json)
responses.add(responses.GET, datasets_url, json=datasets_json)
responses.add(responses.POST, refresh_url, json=refresh_json)

project = unify.projects.by_resource_id(project_id)
actual_pcwd_dataset = project.as_mastering().published_clusters_with_data()
assert actual_pcwd_dataset.name == pcwd_json["name"]

op = actual_pcwd_dataset.refresh(poll_interval_seconds=0)
assert op.succeeded()
28 changes: 28 additions & 0 deletions tests/unit/test_record_clusters_with_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,28 @@ def test_record_clusters_with_data():
"version": "251",
}

refresh_json = {
"id": "93",
"type": "SPARK",
"description": "Clustering",
"status": {
"state": "SUCCEEDED",
"startTime": "2019-06-24T15:58:56.595Z",
"endTime": "2019-06-24T15:59:17.084Z",
},
"created": {
"username": "admin",
"time": "2019-06-24T15:58:48.734Z",
"version": "2407",
},
"lastModified": {
"username": "system",
"time": "2019-06-24T15:59:18.350Z",
"version": "2423",
},
"relativeId": "operations/93",
}

datasets_json = [rcwd_json]

unify = Client(UsernamePasswordAuth("username", "password"))
Expand All @@ -43,10 +65,16 @@ def test_record_clusters_with_data():
f"http://localhost:9100/api/versioned/v1/projects/{project_id}/unifiedDataset"
)
datasets_url = f"http://localhost:9100/api/versioned/v1/datasets"
refresh_url = project_url + "/recordClustersWithData:refresh"

responses.add(responses.GET, project_url, json=project_config)
responses.add(responses.GET, unified_dataset_url, json=unified_dataset_json)
responses.add(responses.GET, datasets_url, json=datasets_json)
responses.add(responses.POST, refresh_url, json=refresh_json)

project = unify.projects.by_resource_id(project_id)
actual_rcwd_dataset = project.as_mastering().record_clusters_with_data()
assert actual_rcwd_dataset.name == rcwd_json["name"]

op = actual_rcwd_dataset.refresh(poll_interval_seconds=0)
assert op.succeeded()

0 comments on commit 528a1fe

Please sign in to comment.