In [1]:
# Authenticate first: https://cloud.google.com/docs/authentication/getting-started

import json
from queryt.queryt import Dataset, Condition as C, Column
from google.cloud import bigquery


schema = json.load(open("queryt/schema.json", "r"))["schema"]
client = bigquery.Client()
dataset = Dataset(client=client, table="gdc-bq-sample.gdc_metadata.r26_clinical")

In [2]:
dataset.columns()

['updated_datetime',
 'created_datetime',
 'state',
 'sample_ids',
 'submitter_sample_ids',
 'days_to_lost_to_followup',
 'case_id',
 'primary_site',
 'disease_type',
 'submitter_id',
 'lost_to_followup',
 'index_date',
 'days_to_consent',
 'consent_type',
 'demographic',
 'demographic.updated_datetime',
 'demographic.created_datetime',
 'demographic.submitter_id',
 'demographic.cause_of_death',
 'demographic.race',
 'demographic.demographic_id',
 'demographic.days_to_birth',
 'demographic.gender',
 'demographic.weeks_gestation_at_birth',
 'demographic.state',
 'demographic.vital_status',
 'demographic.year_of_birth',
 'demographic.premature_at_birth',
 'demographic.ethnicity',
 'demographic.year_of_death',
 'demographic.age_at_index',
 'demographic.days_to_death',
 'demographic.age_is_obfuscated',
 'demographic.occupation_duration_years',
 'demographic.cause_of_death_source',
 'demographic.country_of_residence_at_enrollment',
 'diagnoses',
 'diagnoses.ann_arbor_extranodal_involvement'

Rows reference the CDA MVP Query list: https://docs.google.com/spreadsheets/d/1VY_zL7NibJa2lpY5YWwYnPlxfKT_wjZn9I4EI1RRUCU/edit#gid=807012355

Row 16

Select data from TCGA-OV project, with donors over age 50 with Stage IIIC cancer? Do we need to specify the cancer type?

In [3]:
c1 = C('demographic.age_at_index', '>=', 50)
c2 = C('project.project_id', '=', 'TCGA-OV')
c3 = C('diagnoses.figo_stage', '=', 'Stage IIIC')

c = c1.And(c2).And(c3)
print(dataset.sql(c))
rows = dataset.query(c)

rows.to_dataframe()

SELECT * FROM gdc-bq-sample.gdc_metadata.r26_clinical, UNNEST(project) AS _project, UNNEST(diagnoses) AS _diagnoses, UNNEST(demographic) AS _demographic WHERE (((_demographic.age_at_index >= 50) AND (_project.project_id = 'TCGA-OV')) AND (_diagnoses.figo_stage = 'Stage IIIC'))


Unnamed: 0,updated_datetime,created_datetime,state,sample_ids,submitter_sample_ids,days_to_lost_to_followup,case_id,primary_site,disease_type,submitter_id,...,year_of_birth,premature_at_birth,ethnicity,year_of_death,age_at_index,days_to_death,age_is_obfuscated,occupation_duration_years,cause_of_death_source,country_of_residence_at_enrollment
0,2019-08-06T14:40:06.221317-05:00,,released,"a1ec9279-c1a6-4e58-97ed-9ec1f36187c5, d2959ea3...","TCGA-04-1331-01A, TCGA-04-1331-10A",,6d10d4ee-6331-4bba-93bc-a7b64cc0b22a,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-04-1331,...,1926,,not hispanic or latino,2007.0,78,1336.0,,,,
1,2019-08-06T14:40:06.221317-05:00,,released,"b8243412-b7b4-4a7b-b823-4b33488ff429, 64197180...","TCGA-04-1332-01A, TCGA-04-1332-10A",,b46263ab-c3ca-4fda-a895-74c7e6e6fe22,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-04-1332,...,1932,,not hispanic or latino,2005.0,70,1247.0,,,,
2,2019-08-06T14:40:06.221317-05:00,,released,"85c1ca53-caa5-4a53-abc5-4c2bbfbdec6c, d8ab03a2...","TCGA-04-1337-01A, TCGA-04-1337-11A",,d1e974e7-dd68-40cc-ad06-2b57d964e5a1,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-04-1337,...,1927,,not hispanic or latino,2005.0,78,61.0,,,,
3,2019-08-06T14:40:06.221317-05:00,,released,"1055ea29-d254-48c4-8d7f-28320d631adf, 7b90d4ab...","TCGA-04-1338-11A, TCGA-04-1338-01A",,31872f6a-d225-4f91-b38d-4505d19e406c,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-04-1338,...,1927,,not hispanic or latino,,78,,,,,
4,2019-08-06T14:40:06.221317-05:00,,released,"19e73c0b-dcbe-47b5-9358-c497af4e570b, ea3b7dfd...","TCGA-04-1346-11A, TCGA-04-1346-01A",,25a0a9e6-4f5b-45d8-8f34-abfd31d5ff1b,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-04-1346,...,1930,,not hispanic or latino,,73,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,2019-08-06T14:42:15.255957-05:00,,released,"486a0b8b-e837-4b59-9677-7f4cb91e0799, 57eba0e7...","TCGA-61-2610-11A, TCGA-61-2610-01A, TCGA-61-26...",,1098441d-fb10-4590-a302-0a7cc2332798,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-61-2610,...,1937,,not reported,2002.0,61,1579.0,,,,
332,2019-08-06T14:42:15.255957-05:00,,released,"71380c4a-6782-485f-882d-26ba8b3308ee, 13cba33b...","TCGA-61-2612-11A, TCGA-61-2612-01A",,c040df8a-9d6d-43bd-911a-17a7b4e1ec74,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-61-2612,...,1939,,not reported,2002.0,63,193.0,,,,
333,2019-08-06T14:42:15.255957-05:00,,released,"19d1b6aa-bf64-4749-8a7a-a3f3a020cbc3, 14b03385...","TCGA-61-2613-01A, TCGA-61-2613-11A",,c88502e9-0bc6-47c3-adc4-d98b63c7797b,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-61-2613,...,1925,,not reported,2000.0,73,945.0,,,,
334,2019-08-06T14:42:15.255957-05:00,,released,"c8cd2d09-541f-4daa-a71a-34242b997f4b, 5631f6eb...","TCGA-61-2614-01A, TCGA-61-2614-10A",,09643c0d-efdb-46b4-94df-018bd6134a88,Ovary,"Cystic, Mucinous and Serous Neoplasms",TCGA-61-2614,...,1936,,not reported,2007.0,71,262.0,,,,


Row 17

Select data from female donors from the TCGA-HNSC project from tissue samples from the tongue and exposure to both smoking and alcohol

In [4]:
c1 = C('project.project_id', '=', 'TCGA-HNSC')
c2 = C('demographic.gender', '=', 'female')

In [5]:
dataset.unique_values_for(Column("diagnoses.tissue_or_organ_of_origin"))

['Abdomen, NOS',
 'Adrenal gland, NOS',
 'Ampulla of Vater',
 'Anterior floor of mouth',
 'Anterior mediastinum',
 'Anterior wall of bladder',
 'Anus, NOS',
 'Aortic body and other paraganglia',
 'Appendix',
 'Ascending colon',
 'Autonomic nervous system, NOS',
 'Base of tongue, NOS',
 'Biliary tract, NOS',
 'Bladder neck',
 'Bladder, NOS',
 'Body of pancreas',
 'Body of stomach',
 'Bone marrow',
 'Bone, NOS',
 'Bones of skull and face and associated joints',
 'Border of tongue',
 'Brain stem',
 'Brain, NOS',
 'Breast, NOS',
 'Cardia, NOS',
 'Cecum',
 'Cerebellum, NOS',
 'Cerebrum',
 'Cervix uteri',
 'Cheek mucosa',
 'Choroid',
 'Ciliary body',
 'Colon, NOS',
 'Conjunctiva',
 'Connective, subcutaneous and other soft tissues of abdomen',
 'Connective, subcutaneous and other soft tissues of head, face, and neck',
 'Connective, subcutaneous and other soft tissues of lower limb and hip',
 'Connective, subcutaneous and other soft tissues of pelvis',
 'Connective, subcutaneous and other soft

In [6]:
c3 = C('diagnoses.tissue_or_organ_of_origin', '=', 'Tongue, NOS')

In [7]:
dataset.unique_values_for(Column('exposures.alcohol_history'))

[None, 'No', 'Not Reported', 'Yes']

In [8]:
c4 = C('exposures.alcohol_history', '=', 'Yes')

In [9]:
dataset.unique_values_for(Column('exposures.tobacco_smoking_status'))

[None, 1, 2, 3, 4, 5, 7]

In [10]:
c5 = C('exposures.tobacco_smoking_status', 'IS NOT', None)

In [11]:
c = c1.And(c2).And(c3).And(c4).And(c5)
print(dataset.sql(c))
rows = dataset.query(c)

rows.to_dataframe()

SELECT * FROM gdc-bq-sample.gdc_metadata.r26_clinical, UNNEST(project) AS _project, UNNEST(exposures) AS _exposures, UNNEST(diagnoses) AS _diagnoses, UNNEST(demographic) AS _demographic WHERE (((((_project.project_id = 'TCGA-HNSC') AND (_demographic.gender = 'female')) AND (_diagnoses.tissue_or_organ_of_origin = 'Tongue, NOS')) AND (_exposures.alcohol_history = 'Yes')) AND (_exposures.tobacco_smoking_status IS NOT NULL))


Unnamed: 0,updated_datetime,created_datetime,state,sample_ids,submitter_sample_ids,days_to_lost_to_followup,case_id,primary_site,disease_type,submitter_id,...,year_of_birth,premature_at_birth,ethnicity,year_of_death,age_at_index,days_to_death,age_is_obfuscated,occupation_duration_years,cause_of_death_source,country_of_residence_at_enrollment


Apparently GDC does not contain any data for this condition. To show the query works, we can relax the smoking status criterion

Select data from female donors from the TCGA-HNSC project from tissue samples from the tongue and exposure to alcohol

In [12]:
c1 = C('project.project_id', '=', 'TCGA-HNSC')
c2 = C('demographic.gender', '=', 'female')
c3 = C('diagnoses.tissue_or_organ_of_origin', '=', 'Tongue, NOS')
c4 = C('exposures.alcohol_history', '=', 'Yes')

c = c1.And(c2).And(c3).And(c4)
print(dataset.sql(c))
rows = dataset.query(c)

rows.to_dataframe()

SELECT * FROM gdc-bq-sample.gdc_metadata.r26_clinical, UNNEST(project) AS _project, UNNEST(exposures) AS _exposures, UNNEST(diagnoses) AS _diagnoses, UNNEST(demographic) AS _demographic WHERE ((((_project.project_id = 'TCGA-HNSC') AND (_demographic.gender = 'female')) AND (_diagnoses.tissue_or_organ_of_origin = 'Tongue, NOS')) AND (_exposures.alcohol_history = 'Yes'))


Unnamed: 0,updated_datetime,created_datetime,state,sample_ids,submitter_sample_ids,days_to_lost_to_followup,case_id,primary_site,disease_type,submitter_id,...,year_of_birth,premature_at_birth,ethnicity,year_of_death,age_at_index,days_to_death,age_is_obfuscated,occupation_duration_years,cause_of_death_source,country_of_residence_at_enrollment
0,2019-08-06T14:25:14.243346-05:00,,released,"c4dd75fb-a4f3-47b1-aeb8-1fcc6164bdbd, 327bbca7...","TCGA-BA-4077-10A, TCGA-BA-4077-01Z, TCGA-BA-40...",,4bfbce2b-9d0b-4e8a-950f-fd8e0ba3e05a,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-BA-4077,...,1958,,not hispanic or latino,2006.0,45,1134.0,,,,
1,2019-08-06T14:25:14.243346-05:00,,released,"2ab035a1-3f39-49f8-90ae-a58b8504b98e, 0d0ef61c...","TCGA-BA-A6DB-01A, TCGA-BA-A6DB-10A, TCGA-BA-A6...",,207c8a26-1a2d-47d9-8db8-a25a7fd8655a,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-BA-A6DB,...,1988,,not hispanic or latino,,24,,,,,
2,2019-08-06T14:25:14.243346-05:00,,released,"db2d1e7c-76c4-41a3-92d8-d07ad4463a9a, eec2369a...","TCGA-BA-A6DE-10A, TCGA-BA-A6DE-01Z, TCGA-BA-A6...",,116ad004-929d-4c8d-8eee-883d132e0fe5,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-BA-A6DE,...,1943,,not hispanic or latino,,70,,,,,
3,2019-08-06T14:25:14.243346-05:00,,released,"2840e28d-196f-40d3-89c0-69d083b8f8bb, 655073c2...","TCGA-BB-7863-01A, TCGA-BB-7863-10A, TCGA-BB-78...",,31202ace-6480-46db-8e0d-b4c64149e326,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-BB-7863,...,1968,,hispanic or latino,,43,,,,,
4,2019-08-06T14:25:25.511101-05:00,,released,"206fa54c-7fb3-442a-9a50-09c5cea8a6c7, f0ef7803...","TCGA-CN-4736-10A, TCGA-CN-4736-01A, TCGA-CN-47...",,0fb3e5ff-54f0-43c5-9322-541bb7825e7f,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-CN-4736,...,1940,,not hispanic or latino,2011.0,70,395.0,,,,
5,2019-08-06T14:25:25.511101-05:00,,released,"580d4b59-8fb5-49bc-8030-56524652ddc3, 952cf6ee...","TCGA-CN-4742-01Z, TCGA-CN-4742-01A, TCGA-CN-47...",,444e5bfc-d4e5-4b0d-b96d-88ecea8873e9,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-CN-4742,...,1960,,not hispanic or latino,2009.0,48,397.0,,,,
6,2019-08-06T14:25:25.511101-05:00,,released,"da2fe372-d082-40b6-81af-f895050af58d, 92b0b8b0...","TCGA-CN-5367-01A, TCGA-CN-5367-01Z, TCGA-CN-53...",,61d0709c-d148-413b-a7b5-c0c82ed2f32f,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-CN-5367,...,1946,,not hispanic or latino,2006.0,60,352.0,,,,
7,2019-08-06T14:25:39.854271-05:00,,released,"663610ea-beda-4364-8601-8a3672391897, 78565a47...","TCGA-CN-A640-10A, TCGA-CN-A640-01A, TCGA-CN-A6...",,ed46cc12-2edd-4c95-9814-54b5b6682fcd,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-CN-A640,...,1972,,not hispanic or latino,2012.0,40,134.0,,,,
8,2019-08-06T14:25:39.854271-05:00,,released,"a10f3b24-ff67-48fb-9eb3-afb10b3d36fb, 2aad293c...","TCGA-CQ-6219-01Z, TCGA-CQ-6219-10A, TCGA-CQ-62...",,0ca3d25a-22e9-4e40-834b-65f402535005,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-CQ-6219,...,1957,,not hispanic or latino,2008.0,50,479.0,,,,
9,2019-08-06T14:25:53.026261-05:00,,released,"f3f57fc3-e8d6-452e-bead-edbe25e4bad8, e38f9f90...","TCGA-CQ-A4CE-10A, TCGA-CQ-A4CE-01A, TCGA-CQ-A4...",,e1527704-8a97-445f-a5ee-357920cb28f7,Other and unspecified parts of tongue,Squamous Cell Neoplasms,TCGA-CQ-A4CE,...,1935,,not reported,,76,,,,,


Row 13

"Find data sequencing data from samples where the donor is Asian"

**Problem identified**
1. Current table does not have links to sequencing data

In [13]:
dataset.unique_values_for(Column("demographic.race"))

['Unknown',
 'american indian or alaska native',
 'asian',
 'black or african american',
 'native hawaiian or other pacific islander',
 'not allowed to collect',
 'not reported',
 'other',
 'unknown',
 'white']

In [14]:
c1 = C('demographic.race', '=', 'asian')

print(dataset.sql(c1))
rows = dataset.query(c1)

next(rows.to_dataframe_iterable())

SELECT * FROM gdc-bq-sample.gdc_metadata.r26_clinical, UNNEST(demographic) AS _demographic WHERE (_demographic.race = 'asian')


Unnamed: 0,updated_datetime,created_datetime,state,sample_ids,submitter_sample_ids,days_to_lost_to_followup,case_id,primary_site,disease_type,submitter_id,...,year_of_birth,premature_at_birth,ethnicity,year_of_death,age_at_index,days_to_death,age_is_obfuscated,occupation_duration_years,cause_of_death_source,country_of_residence_at_enrollment
0,2020-01-09T17:27:25.659673-06:00,2019-08-15T08:16:17.871087-05:00,released,8aec87a0-5e3a-4f77-b4f6-7fb1c402f0fb,PT-RU72-01,,954d88e0-e578-41dd-8a49-21ae84abf81b,Brain,Not Applicable,GTEX-RU72-0011-R10A-SM-HAKXS,...,,,Unknown,,59.0,0.0,,,,
1,2019-10-24T07:59:21.887408-05:00,2017-01-25T15:29:16.160843-06:00,released,"2417f1af-62b4-4806-bd2f-ffbe294add21, 0ca45495...","9751077c-61c1-49df-9b6f-db7d6e, 08296da7-70ea-...",,82ba31a6-dd05-4f6f-88d9-8f961822d732,Other and unspecified female genital organs,"Cystic, Mucinous and Serous Neoplasms",04OV012,...,,,not hispanic or latino,,,,,,,
2,2019-10-24T07:59:21.887408-05:00,2017-01-25T15:29:16.160843-06:00,released,"5ef944a2-7bdb-4ca5-aa78-9ef98aa1437f, d1dafab7...","1c7e6865-4a6b-41ee-8e3b-456330, bd266bdd-12e4-...",,8569f0ba-0f77-4a53-af49-127604bec479,Other and unspecified female genital organs,"Cystic, Mucinous and Serous Neoplasms",04OV063,...,,,not hispanic or latino,,,,,,,
3,2020-01-09T17:27:25.659673-06:00,2018-10-15T09:04:42.282297-05:00,released,"ac8664aa-6fe4-429a-8223-ea6920f9fe0f, 7ed3d7a2...","C3L-02984-03, C3L-02984-02, C3L-02984-04, C3L-...",,5718a50d-332c-4882-8736-2ca8989946dd,Brain,Gliomas,C3L-02984,...,1983.0,,not hispanic or latino,,,,,,,
4,2019-10-25T13:56:23.173959-05:00,2018-02-20T16:11:27.193958-06:00,released,2b1baf92-6bf1-4114-a619-c4bcc863d8b6,DLBCL11295-sample,,e198a237-2675-4e8e-a903-d76b66da8a61,Unknown,Mature B-Cell Lymphomas,CTSP-B02I,...,1931.0,,not hispanic or latino,,27426.0,,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2735,2019-11-19T10:19:29.140531-06:00,2019-05-31T13:49:42.092784-05:00,released,b50a75fc-b07f-4455-86f6-f482595771d9,GENIE-MSK-P-0019027-T01-IM6,,b6bd5d7f-5787-4de1-97f0-2ca7ca6eff2c,Unknown,Adenomas and Adenocarcinomas,GENIE-MSK-P-0019027,...,,,not hispanic or latino,,14244.0,,,,,
2736,2019-11-19T10:19:29.140531-06:00,2019-05-31T14:19:52.635329-05:00,released,3fa11281-febb-438b-aced-37cf69a82719,GENIE-MSK-P-0019406-T01-IM6,,651fab13-f400-449d-980a-a24da27b5e4a,Unknown,Adenomas and Adenocarcinomas,GENIE-MSK-P-0019406,...,,,not hispanic or latino,,14610.0,,,,,
2737,2019-11-19T10:19:29.140531-06:00,2019-05-31T13:49:42.092784-05:00,released,d1017128-aed9-4b7c-9b28-1432416cdeb3,GENIE-MSK-P-0019655-T01-IM6,,b31e557e-3b96-493c-908e-c2f68be68ba8,Unknown,Adenomas and Adenocarcinomas,GENIE-MSK-P-0019655,...,,,not hispanic or latino,,15340.0,,,,,
2738,2019-11-19T10:19:29.140531-06:00,2019-05-31T14:06:49.866017-05:00,released,461f6858-d98e-44ea-9497-91fc30c87897,GENIE-MSK-P-0019972-T01-IM6,,e5a756fa-8fa9-4dde-bb05-f5f7d48d695d,Unknown,"Epithelial Neoplasms, NOS",GENIE-MSK-P-0019972,...,,,not hispanic or latino,,21915.0,,,,,



Row 8
"Find data across all species with a diagnosis of "spontaneous mammary carcinoma"

**Problem identified**
What do we do with repeated column names? In this case it is "disease_type"

In [15]:
# dataset.unique_values_for("disease_type")

In [16]:
c1 = C('diagnoses.disease_type', '=', 'Acinar Cell Neoplasms')
# c1.as_df(columns, dataset)  # see problems identified

Row 11

Researcher would like to identify all lung samples in the repository with the following characteristics:
o        Sample from primary tumor tissue (not cell line)
o        Disease = triple negative breast cancer
o        Subjects – under the age of 50 years – male or female"

**Problem identified**

1. User issue. I'm not clear how to detect sample type
2. User issue. Could not find "tripe negative breast cancer" in disease type value


In [17]:
c1 = C('demographic.age_at_index', '<', 50)

print(dataset.sql(c1))
rows = dataset.query(c1)

next(rows.to_dataframe_iterable())

SELECT * FROM gdc-bq-sample.gdc_metadata.r26_clinical, UNNEST(demographic) AS _demographic WHERE (_demographic.age_at_index < 50)


Unnamed: 0,updated_datetime,created_datetime,state,sample_ids,submitter_sample_ids,days_to_lost_to_followup,case_id,primary_site,disease_type,submitter_id,...,year_of_birth,premature_at_birth,ethnicity,year_of_death,age_at_index,days_to_death,age_is_obfuscated,occupation_duration_years,cause_of_death_source,country_of_residence_at_enrollment
0,2020-01-09T17:27:25.659673-06:00,2019-08-15T08:16:17.871087-05:00,released,4dac9fd4-c871-4cb5-840f-c545c1dff457,PT-P44H-01,,20bda057-349d-46f7-aeb5-ccf011cb107f,Brain,Not Applicable,GTEX-P44H-0011-R10A-SM-HAKXX,...,,,not reported,,43,0.0,,,,
1,2019-08-06T14:35:37.527864-05:00,,released,"49d06d41-58d6-439d-9d5a-69382a5a64ee, 79586ed4...","TCGA-05-4420-01A, TCGA-05-4420-01Z, TCGA-05-44...",,3d2aa654-1b5f-4eb4-a1c2-af31f5760069,Bronchus and lung,Adenomas and Adenocarcinomas,TCGA-05-4420,...,1967.0,,not reported,,41,,,,,
2,2019-08-06T14:23:39.894683-05:00,,released,"b8187475-69da-4a02-a544-3435344daab8, 482c6c5a...","TCGA-06-A5U0-01A, TCGA-06-A5U0-10A",,603d2b61-b636-4054-ac95-ab66dabb3b48,Brain,Gliomas,TCGA-06-A5U0,...,1991.0,,not hispanic or latino,,21,,,,,
3,2019-08-06T14:23:39.894683-05:00,,released,"5398667c-8d52-4ba0-80e0-e8376dc8c87d, 0f7f7100...","TCGA-06-A7TL-01A, TCGA-06-A7TL-10A",,0ca72ebd-ff33-45b8-a97c-3f1435603d71,Brain,Gliomas,TCGA-06-A7TL,...,1983.0,,not hispanic or latino,,30,,,,,
4,2019-08-06T14:22:07.752619-05:00,,released,"9961f6a4-796e-4970-841c-a7577d31dd3e, b0a70954...","TCGA-2H-A9GH-01Z, TCGA-2H-A9GH-01A, TCGA-2H-A9...",,606dc5b8-7625-42a6-a936-504ef25623a4,Esophagus,Adenomas and Adenocarcinomas,TCGA-2H-A9GH,...,1954.0,,not reported,2000.0,44,951.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2043,2019-08-06T14:51:19.968305-05:00,,released,"f15cadb0-3a5f-4844-b1af-12355802bf77, f450325c...","TCGA-2G-AAHP-01Z, TCGA-2G-AAHP-05Z, TCGA-2G-AA...",,d7d793f6-eaea-4dd3-8237-c2c02b042340,Testis,Germ Cell Neoplasms,TCGA-2G-AAHP,...,1968.0,,not hispanic or latino,,31,,,,,
2044,2019-08-06T14:51:19.968305-05:00,,released,"1724aa95-91f0-44c7-8f89-d5045f404677, d8022e3a...","TCGA-2G-AAHT-10A, TCGA-2G-AAHT-01A, TCGA-2G-AA...",,905c998d-4219-46ef-9906-8e92d7416794,Testis,Germ Cell Neoplasms,TCGA-2G-AAHT,...,1981.0,,not hispanic or latino,,32,,,,,
2045,2019-08-06T14:51:19.968305-05:00,,released,"2220880d-0b51-441d-a6c6-65094af16d4b, ac420efc...","TCGA-2G-AAKD-10A, TCGA-2G-AAKD-01Z, TCGA-2G-AA...",,9c947fba-e570-4a4b-9d16-9496dae1ee2e,Testis,Germ Cell Neoplasms,TCGA-2G-AAKD,...,1979.0,,not hispanic or latino,,18,,,,,
2046,2019-08-06T14:51:19.968305-05:00,,released,"3a919db9-6c8f-43e7-b2c5-f36b3175c9d4, b70453ae...","TCGA-2G-AAKG-01A, TCGA-2G-AAKG-05Z, TCGA-2G-AA...",,43cc3c60-f0a6-4b44-bd7b-8ec4da71991f,Testis,Germ Cell Neoplasms,TCGA-2G-AAKG,...,1974.0,,not hispanic or latino,,22,,,,,
