This repository has been archived by the owner on Oct 28, 2022. It is now read-only.
/
bio_metadata.proto
218 lines (180 loc) · 8.5 KB
/
bio_metadata.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
syntax = "proto3";
package ga4gh;
import "ga4gh/common.proto";
// An individual (or subject) typically corresponds to an individual
// human or another organism.
message Individual {
// The Individual's :ref:`id <apidesign_object_ids>`. This is unique in the
// context of the server instance.
string id = 1;
// The ID of the dataset this Individual belongs to.
string dataset_id = 2;
// The Individual's name. This is a label or symbolic identifier for the individual.
string name = 3;
// The Individual's description. This attribute contains human readable text.
// The "description" attributes should not contain any structured data.
string description = 4;
// The bio_characteristics attribute uses lists of BioCharacteristic objects to describe
// diseases, phenotypes ... associated with this Individual. The values here may
// overlap with values recorded in related records. As general rule, "germline"
// or non-organ specific afflictions ("Li-fraumeni Syndrome", "Diabetes mellitus"
// should be recorded here, whereas "Infiltrating duct carcinoma" may be both
// recorded here (as known disease history)
// as well as in the "Biosample.bio_characteristics" attribute.
// For querying all disease contexts related to an individual, therefore queries
// should cover ($in) [Individual.bio_characteristics OR Biosample.bio_characteristics].
repeated BioCharacteristic bio_characteristics = 9;
// The :ref:`ISO 8601<metadata_date_time>` time at which this Individual's record
// was created.
string created = 5;
// The :ref:`ISO 8601<metadata_date_time>` time at which this Individual record
// was updated.
string updated = 6;
// For a representation of an NCBI Taxon ID as an OntologyTerm, see
// NCBITaxon Ontology
// http://www.obofoundry.org/ontology/ncbitaxon.html
// For example, 'Homo sapiens' has the ID 9606. The NCBITaxon ontology ID for
// this is NCBITaxon:9606, and therefore:
// species : { term_id: "NCBITaxon:9606", term : "Homo sapiens" }
OntologyTerm species = 7;
// The genetic sex of this individual.
// Use `null` when unknown or not applicable.
// Recommended: PATO http://purl.obolibrary.org/obo/PATO_0020000 ...
// Example:
// sex : { term_id: "PATO:0020000", term : "female genetic sex" }
OntologyTerm sex = 8;
// The address coded as geolocation where this individual originated from.
// It is recommended that this reflects the place of birth or main place of
// living, not necessarily a current address.
GeoLocation location = 13;
// A map of additional information regarding the Individual.
Attributes attributes = 10;
// External identifiers representing this individual. These are considered
// different representation of the same record, not records which are in some
// other relation with the record at hand.
repeated ExternalIdentifier external_identifiers = 11;
}
// A Biosample refers to a unit of biological material from which the substrate
// molecules (e.g. genomic DNA, RNA, proteins) for molecular analyses (e.g.
// sequencing, array hybridisation, mass-spectrometry) are extracted. Examples
// would be a tissue biopsy, a single cell from a culture for single cell genome
// sequencing or a protein fraction from a gradient centrifugation.
// Several instances (e.g. technical replicates) or types of experiments (e.g.
// genomic array as well as RNA-seq experiments) may refer to the same Biosample.
// In the context of the GA4GH metadata schema, Biosample constitutes the central
// reference object.
message Biosample {
// The Biosample :ref:`id <apidesign_object_ids>`. This is unique in the
// context of the server instance.
string id = 1;
// The ID of the dataset this Biosample belongs to.
string dataset_id = 2;
// The Biosample's name This is a label or symbolic identifier for the biosample.
string name = 3;
// The biosample's description. This attribute contains human readable text.
// The "description" attributes should not contain any structured data.
string description = 4;
// characteristics object uses lists of BioCharacteristic objects to describe
// diseases, phenotypes, source ... associated with this BioSample.
repeated BioCharacteristic bio_characteristics = 5;
// The :ref:`ISO 8601<metadata_date_time>` time at which this Biosample record
// was created.
string created = 6;
// The :ref:`ISO 8601<metadata_date_time>` time at which this Biosample record was
// updated.
string updated = 7;
// The individual this biosample was derived from.
string individual_id = 8;
// A map of additional information about the Biosample.
Attributes attributes = 10;
// External identifiers representing this biosample. These are considered
// different representation of the same record, not records which are in some
// other relation with the record at hand.
repeated ExternalIdentifier external_identifiers = 11;
// An age object describing the age of the individual this biosample was
// derived from at the time of collection. The Age object allows the encoding
// of the age either as ISO8601 duraion or time interval (preferred), or
// as ontology term object.
// Example:
// "individual_age_at_collection": {
// "age": "P12Y0M",
// "age_class": {
// "term": "Juvenile onset",
// "term_id": "HP:0003621"
// }
// },
Age individual_age_at_collection = 12;
// The address coded as GeoLocation where the biosample was collected.
GeoLocation location = 13;
}
// The age object permits both the (considered default) encoding of an age
// value in ISO8601, with arbitrary granularity; and the representation of an
// "age class" as qualitative ontology term.
// If available, a quantitative value should be used & take precedence over the
// age class, and class assignment should be performed at user/API level.
message Age {
// The :ref:`ISO 8601<metadata_date_time>` age of this object as ISO8601
// duration or time intervals. The use of time intervals makes an additional
// anchor unnecessary (i.e. DOB and age can be represented as start-anchored
// time interval, e.g. 1967-11-21/P40Y10M05D)
// TODO: Anonymous reference time attribute/pointer?
string age = 1;
// An age class, e.g. corresponding to the use of "age of onset" in HPO.
// HPO is recommended, for example, subclasses of "Onnset":
// http://purl.obolibrary.org/obo/HP_0003674
// Example:
// age_class : { term_id : "HP:0003596", term : "Middle age onset" }
OntologyTerm age_class = 2;
}
// BioCharacteristic is a prototype wrapper object for single instances
// of phenotypes, diseases ... which may be described through one or several
// ontology terms
message BioCharacteristic {
// A free text description of the specific disease diagnosis or phenotype
// here, which is then characterized by zero or more OntologyTerm objects.
// The description should be concise and should not include data points
// better expressed through specific attributes elsewhere in the schema.
// Example (for a single disease item):
// "squamous cell carcinoma, base of tongue, stage 2"
string description = 1;
// The ontologyTerms attribute contains a list of zero (discouraged) or more
// OntologyTerm objects covering the characteristic (e.g. disease diagnosis,
// phenotype) reorted here.
// Example (for a single diagnosis "squamous cell carcinoma, base of tongue"):
//
// term_id: "DOID:0050865",
// term: "tongue squamous cell carcinoma",
//
// term_id: "UBERON:0006919",
// term: "tongue squamous epithelium",
//
// term_id: "UBERON:0010033",
// term: "posterior part of tongue",
//
repeated OntologyTerm ontology_terms = 2;
// negatedOntologyTerms are used to describe features which are explicitely
// not part of the BioCharacteristic.
// Example: For a phenotype
//
// description: "Bilateral ventricle anomalies (but not hypertrophy)"
//
// ... one could use the ontologyTerms
//
// term_id: "HP:0001711"
// term: "Abnormality of the left ventricle"
//
// id: "HP:0001707"
// term: "Abnormality of the right ventricle"
//
// ... and add to negatedOntologyTerms
//
// term_id: "HP:0001714"
// term: "Ventricular hypertrophy"
//
repeated OntologyTerm negated_ontology_terms = 3;
// Logical scope of this BioCharacteristic. Typical examples
// could be "phenotype", "disease", "observation", "source".
// TODO: This may be modified into an enumeration or expressed through
// an OntologyTerm.
string scope = 4;
}