-
Notifications
You must be signed in to change notification settings - Fork 6
/
api.yaml
272 lines (272 loc) · 9.95 KB
/
api.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
swagger: "2.0"
info:
title: Data Explorer Service
description: API Service that reads from Elasticsearch.
license:
name: BSD
url: "http://opensource.org/licenses/BSD-3-Clause"
version: "0.0.1"
paths:
/dataset:
get:
description: Gets dataset information, such as name.
responses:
200:
description: Success
schema:
$ref: "#/definitions/DatasetResponse"
tags:
# Put in dataset_controller.py instead of default_controller.py.
- Dataset
/facets:
# Example url /facets?project_id.dataset_id.table_name.filter=Gender=female,project_id.dataset_id.table_name.Region=northwest,project_id.dataset_id.table_name.Region=southwest
get:
description: Returns facets.
parameters:
- name: filter
# Unfortunately OpenAPI 2.0 doesn't allow objects in GET query
# parameters. So hardcode something like "Gender=female".
# TODO(#32): Investigate switching to OpenAPI 3.0.
description: >
filter represents selected facet values. Elasticsearch query will
be run only over selected facet values. filter is an array of
strings, where each string has the format "esFieldName=facetValue".
Example url /facets?filter=Gender=female,Region=northwest,Region=southwest
in: query
type: array
# Default collectionFormat is csv. With csv, faceted search doesn't
# work when facet value has a comma. So switch to pipes; pipe in facet
# value is much more rare.
collectionFormat: pipes
items:
type: string
- name: extraFacets
description:
extraFacets represents the fields selected from the field search box.
extraFacets is a list of Elasticsearch field names. In the returned
list of facets, the extra facets will come before the facets from ui.json.
in: query
collectionFormat: pipes
type: array
items:
type: string
responses:
200:
description: Success
schema:
$ref: "#/definitions/FacetsResponse"
tags:
# Put in facets_controller.py instead of default_controller.py.
- Facets
/search:
get:
description: >
Free text search over dataset. If query is empty, this returns all
dataset fields, to populate the search drop-down on initial page load.
If query is set, this returns only dataset fields that match the query.
parameters:
- name: query
description: >
What was typed into search box. Say user typed "foo". query is "foo";
Elasticsearch will be searched for "foo*".
in: query
type: string
responses:
200:
description: Success
schema:
$ref: "#/definitions/SearchResponse"
tags:
# Put in search_controller.py instead of default_controller.py.
- Search
/exportUrl:
# This is post because this is not idempotent; a GCS file is created.
post:
description: >
Creates and returns a signed URL to a GCS zip file of JSON files. The
JSON files represent entities to be exported to a Terra workspace.
https://app.terra.bio/#import-data may be called with the url parameter
set to this url. For each JSON file,
https://rawls.dsde-prod.broadinstitute.org/#!/entities/create_entity
may be called with the JSON as the POST body.
parameters:
- name: "exportUrlRequest"
in: "body"
schema:
type: object
properties:
cohortName:
type: string
filter:
type: array
# Default collectionFormat is csv. With csv, faceted search doesn't
# work when facet value has a comma. So switch to pipes; pipe in facet
# value is much more rare.
collectionFormat: pipes
items:
type: string
dataExplorerUrl:
type: string
sqlQuery:
type: string
responses:
200:
description: Success
schema:
$ref: "#/definitions/ExportUrlResponse"
tags:
# Put in export_url_controller.py instead of default_controller.py.
- ExportUrl
definitions:
DatasetResponse:
description: "Dataset information."
properties:
name:
type: string
search_placeholder_text:
type: string
description: Optional. What to show in the search box by default
time_series_unit:
type: string
description: >
If time_series_column in bigquery.json is set, this must be
set. This is used to label the time axis -- eg Month or
Year.
FacetsResponse:
description: "Results from a faceted search."
properties:
facets:
type: array
items:
$ref: "#/definitions/Facet"
count:
type: integer
description: >
Number of entities represented by current facet selection. For
example, this could be 40, representing 40 people.
invalid_filter_facets:
description: >
Facets that were passed in filter param that don't exist in
Elasticsearch index. Example:
- Data Explorer url contains
filter=amppd.2019_v1_0101.demographics.sex=female which is valid. User
saves a cohort with this filter
- A new version of AMP PD is released. (Data explorer url remains the
same.) The dataset 2019_v1_0101 is replaced by dataset 2019_v2_0401.
- User won't be able to open saved cohort in DE;
amppd.2019_v1_0101.demographics.sex is no longer is Elasticsearch
index. invalid_filter_facets will contain
amppd.2019_v1_0101.demographics.sex
type: array
items:
type: string
invalid_extra_facets:
description: >
Facets that were passed in extraFacets param that don't exist in
Elasticsearch index. Example:
- Data Explorer url contains
extraFacets=amppd.2019_v1_0101.demographics.sex which is valid. User
saves a cohort with this extra facet
- A new version of AMP PD is released. (Data explorer url remains the
same.) The dataset 2019_v1_0101 is replaced by dataset 2019_v2_0401.
- User won't be able to open saved cohort in DE;
amppd.2019_v1_0101.demographics.sex is no longer is Elasticsearch
index. invalid_extra_facets will contain
amppd.2019_v1_0101.demographics.sex
type: array
items:
type: string
sql_query:
type: string
description: >
SQL query that can be used in BigQuery to get the cohort
(list of participants) of the current filter.
Facet:
description: >
A facet. For example, the Gender facet would include the facet name
"Gender", as well as counts for all possible values.
properties:
name:
type: string
description: Facet name, for example, "Gender".
description:
type: string
description: Optional facet description.
es_field_name:
type: string
description: The Elasticsearch field name.
es_field_type:
type: string
description: The Elasticsearch field type.
# If this is a regular facet (https://i.imgur.com/ug1mSEr.png),
# only value_names and value_counts are set. If this is a time
# series facet (https://i.imgur.com/IU2SqWv.png), then
# value_names, time_names, time_series_value_counts are
# set. Consider https://i.imgur.com/IU2SqWv.png: the data is a
# table with 6 rows and 3 columns. Column names are time_names;
# row names are value_names; time_series_value_counts are the
# numbers in the table.
value_names:
type: array
items:
type: string
description: >
Array of names of possible facet values.
value_counts:
type: array
items:
type: integer
description: Array of counts for each facet value.
time_names:
type: array
# This is string because time may be "Unknown"
items:
type: string
description: Array of times.
time_series_value_counts:
type: array
items:
type: array
items:
type: integer
description: >
2-dimensional array of facet value counts, indexed by time
then value; indexes correspond to time_names and then
value_names.
ExportUrlResponse:
description: "Information for sending data to Terra"
properties:
url:
type: string
description: "URL-encoded signed URL of GCS file containing entities to export."
authorization_domain:
type: string
description: "Optional FireCloud authorization domain"
SearchResponse:
properties:
search_results:
type: array
items:
$ref: "#/definitions/SearchResult"
SearchResult:
description: >
Can represent a facet (Gender), or a facet and selected value (Gender = female).
properties:
facet_name:
type: string
description: The name of the facet.
facet_description:
type: string
description: Optional. The description of the facet.
elasticsearch_field_name:
type: string
description: The Elasticsearch field name.
facet_value:
type: string
description: >
If this search result represents a facet, facet_value is the empty string.
If this search result represents a facet and selected value, facet_value is the selected value.
is_time_series:
type: boolean
description: >
True if this is a time series field at a specific point in time.