# **Filtering TCGA-BRCA Metadata from the GDC API**

This notebook produces the AT_FOR family of data artifacts  

- TCGA: The Cancer Genome Atlas  
- BRCA: Breast Invasive Carcinoma  
- GDC: Genomic Data Commons

# Import Libraries and Configurations

In [1]:
import os
import sys
from ast import literal_eval

import numpy as np
import pandas as pd

# Add project root to Python's path
sys.path.append(os.path.abspath(os.path.join('..')))

from config import (
    BRCA_PREPROCESSING_PARAMETERS,
    BRCA_PROCESSED_FILES_PATHS,
    BRCA_RAW_FILES_PATHS,
)

# Load the Metadata from the GDC API

In [2]:
# Create a DataFrame for the TCGA-BRCA cases metadata
df_api_cases = pd.read_csv(BRCA_RAW_FILES_PATHS['cases'])

# Create a DataFrame for the TCGA-BRCA files metadata
df_files = pd.read_csv(BRCA_RAW_FILES_PATHS['files'])

In [3]:
# Print the DataFrame of TCGA-BRCA cases metadata
df_api_cases

Unnamed: 0,case_id,submitter_id,disease_type
0,b1d44c81-747d-471f-9093-aeb262a17975,TCGA-Z7-A8R6,"Epithelial Neoplasms, NOS"
1,b205bba0-1870-4458-9088-8817e20389fe,TCGA-A8-A09A,Ductal and Lobular Neoplasms
2,b205c89f-af62-4186-acad-ed23d243fa98,TCGA-A2-A0YL,Ductal and Lobular Neoplasms
3,b26d41cd-393b-4cd4-8925-a9488f7de576,TCGA-C8-A1HE,Ductal and Lobular Neoplasms
4,b2aac45b-2073-4c7a-adb9-769a4fdcc111,TCGA-E9-A1NH,Ductal and Lobular Neoplasms
...,...,...,...
1093,af5453a9-cf1f-40de-aec4-0e0710908fb7,TCGA-A8-A0AD,Ductal and Lobular Neoplasms
1094,af97e043-88cb-4f99-8f8b-9bcbcccdf842,TCGA-EW-A2FR,Ductal and Lobular Neoplasms
1095,b0700958-5f90-4546-b35f-635cd506889b,TCGA-PE-A5DD,Ductal and Lobular Neoplasms
1096,b0bcb829-cee0-4247-bb75-093a5bea89ee,TCGA-E2-A105,Ductal and Lobular Neoplasms


In [4]:
# Print the DataFrame of TCGA-BRCA files metadata
df_files

Unnamed: 0,file_id,case_id,access,experimental_strategy,data_type,data_format,samples
0,75a8669a-692c-468c-8469-d2e61f6d37d8,cc348a26-ee11-47a4-8b51-de922967e175,open,Methylation Array,Masked Intensities,IDAT,"[{'sample_type': 'Primary Tumor', 'tissue_type..."
1,7abd94ba-570e-43d4-9360-ad2d8399c960,f2bbfa9d-9a9d-4f46-9fde-378e4c44e2ad,controlled,WGS,Structural Rearrangement,BEDPE,"[{'sample_type': 'Blood Derived Normal', 'tiss..."
2,97ef35dc-9c03-49ce-8103-5e8c55be8805,f2bbfa9d-9a9d-4f46-9fde-378e4c44e2ad,controlled,RNA-Seq,Transcript Fusion,TSV,"[{'sample_type': 'Solid Tissue Normal', 'tissu..."
3,0518551d-4df2-4124-b68d-494200c5586b,a6edb6ca-ae9f-4da7-8ebe-92d83d2987fb,open,WXS,Masked Somatic Mutation,MAF,"[{'sample_type': 'Primary Tumor', 'tissue_type..."
4,2044bb65-05cf-4ef7-9543-3b7c5c2ff4d5,ec0ab947-9341-4fff-bda4-fdfb9434d508,open,Methylation Array,Masked Intensities,IDAT,"[{'sample_type': 'Primary Tumor', 'tissue_type..."
...,...,...,...,...,...,...,...
91917,c680a261-9fdf-48dd-9e87-274881d700c8,da70cf7e-0e61-4c72-b4c5-c408569d11b8,open,miRNA-Seq,miRNA Expression Quantification,TXT,"[{'sample_type': 'Primary Tumor', 'tissue_type..."
91918,ad76f12b-1710-4e83-8cf2-21c1b6863d8e,d9627184-b972-4e00-8c11-b0b946ac357a,controlled,WGS,Aligned Reads,BAM,"[{'sample_type': 'Blood Derived Normal', 'tiss..."
91919,42eacf87-e962-4b33-859e-29944271e335,d9627184-b972-4e00-8c11-b0b946ac357a,open,Genotyping Array,Masked Copy Number Segment,TXT,"[{'sample_type': 'Blood Derived Normal', 'tiss..."
91920,b49a041c-2f46-412f-b10d-b0dcb40737cb,d9627184-b972-4e00-8c11-b0b946ac357a,open,,Biospecimen Supplement,BCR SSF XML,


# Load and Flag the TCGA-BRCA Paper Data
The Cancer Genome Atlas Network. Comprehensive molecular portraits of human breast tumours. Nature 490, 61–70 (2012). https://doi.org/10.1038/nature11412.

In [5]:
# Create the DataFrame of 'Supplementary Table 1' from the TCGA-BRCA paper
df_paper_cases = pd.read_excel(
    BRCA_RAW_FILES_PATHS['paper'], sheet_name='SuppTable1', skiprows=1
)

# Standardize DataFrame column names
columns = dict()
for name in list(df_paper_cases.columns):
    columns[name] = name.replace(' ', '_').lower()
df_paper_cases = df_paper_cases.rename(columns=columns)

# Flag the cases of interest, i.e., those classified into specific molecular
# subtypes of breast cancer by the PAM50 signature
molecular_subtypes = BRCA_PREPROCESSING_PARAMETERS['molecular_subtypes']
df_paper_cases['is_case_of_interest'] = np.where(
    df_paper_cases['pam50_mrna'].isin(molecular_subtypes), 1, 0
)

# Store the DataFrame of flagged cases in a CSV file
df_paper_cases.to_csv(BRCA_PROCESSED_FILES_PATHS['paper'], index=False)

In [6]:
# Print the DataFrame of flagged cases data from the TCGA-BRCA paper
df_paper_cases

Unnamed: 0,complete_tcga_id,gender,age_at_initial_pathologic_diagnosis,er_status,pr_status,her2_final_status,tumor,tumor--t1_coded,node,node-coded,...,sigclust_unsupervised_mrna,sigclust_intrinsic_mrna,mirna_clusters,methylation_clusters,rppa_clusters,cn_clusters,integrated_clusters_(with_pam50),integrated_clusters_(no_exp),integrated_clusters_(unsup_exp),is_case_of_interest
0,TCGA-A2-A0T2,FEMALE,66.0,Negative,Negative,Negative,T3,T_Other,N3,Positive,...,0.0,-13.0,3.0,5.0,Basal,3.0,2.0,2.0,2.0,1
1,TCGA-A2-A04P,FEMALE,36.0,Negative,Negative,Negative,T2,T_Other,N3,Positive,...,0.0,-13.0,5.0,5.0,Basal,1.0,2.0,2.0,2.0,1
2,TCGA-A1-A0SK,FEMALE,54.0,Negative,Negative,Negative,T2,T_Other,N0,Negative,...,-6.0,-13.0,5.0,5.0,Basal,1.0,2.0,2.0,2.0,1
3,TCGA-A2-A0CM,FEMALE,40.0,Negative,Negative,Negative,T2,T_Other,N0,Negative,...,-12.0,-13.0,4.0,4.0,Basal,4.0,2.0,1.0,1.0,1
4,TCGA-AR-A1AR,FEMALE,50.0,Negative,Negative,Negative,T1,T1,N2,Positive,...,0.0,-13.0,5.0,5.0,,1.0,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
820,TCGA-AC-A2FF,,,,,,,,,,...,,,,1.0,,1.0,,,,0
821,TCGA-AC-A2FB,,,,,,,,,,...,,,,1.0,,3.0,,,,0
822,TCGA-AC-A2FG,,,,,,,,,,...,,,,1.0,,2.0,,,,0
823,TCGA-GI-A2C8,,,,,,,,,,...,,,,3.0,,3.0,,,,0


# Filter the Cases and Files of Interest

## Potential Cases of Interest

In [7]:
# Filter the cases of interest from the paper
df_paper_cases_of_interest = df_paper_cases \
    .query('is_case_of_interest == 1') \
    [['complete_tcga_id', 'pam50_mrna']]

# Retrieve the molecular subtype classification of the cases
df_cases = df_api_cases \
    .merge(
        right=df_paper_cases_of_interest,
        left_on='submitter_id',
        right_on='complete_tcga_id',
        how='left'
    ) \
    [['case_id', 'submitter_id', 'disease_type', 'pam50_mrna']] \
    .fillna(value={'pam50_mrna': ''})
    
# Flag the cases of interest, i.e., those classified into a molecular
# subtype of breast cancer and a specific type of disease
disease_types = BRCA_PREPROCESSING_PARAMETERS['disease_types']
molecular_subtypes = BRCA_PREPROCESSING_PARAMETERS['molecular_subtypes']
df_cases['is_case_of_interest'] = np.where(
    (df_cases['disease_type'].isin(disease_types) 
     & df_cases['pam50_mrna'].isin(molecular_subtypes)), 1, 0
)

In [8]:
# Print the DataFrame of potential flagged cases
df_cases

Unnamed: 0,case_id,submitter_id,disease_type,pam50_mrna,is_case_of_interest
0,b1d44c81-747d-471f-9093-aeb262a17975,TCGA-Z7-A8R6,"Epithelial Neoplasms, NOS",,0
1,b205bba0-1870-4458-9088-8817e20389fe,TCGA-A8-A09A,Ductal and Lobular Neoplasms,Luminal A,1
2,b205c89f-af62-4186-acad-ed23d243fa98,TCGA-A2-A0YL,Ductal and Lobular Neoplasms,Luminal A,1
3,b26d41cd-393b-4cd4-8925-a9488f7de576,TCGA-C8-A1HE,Ductal and Lobular Neoplasms,,0
4,b2aac45b-2073-4c7a-adb9-769a4fdcc111,TCGA-E9-A1NH,Ductal and Lobular Neoplasms,,0
...,...,...,...,...,...
1093,af5453a9-cf1f-40de-aec4-0e0710908fb7,TCGA-A8-A0AD,Ductal and Lobular Neoplasms,Luminal A,1
1094,af97e043-88cb-4f99-8f8b-9bcbcccdf842,TCGA-EW-A2FR,Ductal and Lobular Neoplasms,,0
1095,b0700958-5f90-4546-b35f-635cd506889b,TCGA-PE-A5DD,Ductal and Lobular Neoplasms,,0
1096,b0bcb829-cee0-4247-bb75-093a5bea89ee,TCGA-E2-A105,Ductal and Lobular Neoplasms,Luminal A,1


## Potential Files of Interest

In [9]:
# Retrieve the flag of interest of the cases
df_files = df_files \
    .merge(
        right=df_cases,
        left_on='case_id',
        right_on='case_id',
        how='inner'
    ) \
    .drop(columns=['submitter_id', 'disease_type', 'pam50_mrna'])

# Filter the files based on data type and case
data_types = BRCA_PREPROCESSING_PARAMETERS['data_types']
df_files_of_interest = df_files \
    .query('(data_type == @data_types) and (is_case_of_interest == 1)')

# Explode the lists with dictionaries containing samples metadata
df_files_of_interest.loc[:, 'samples'] = \
    df_files_of_interest['samples'].apply(literal_eval)
df_files_of_interest = df_files_of_interest.explode('samples')
df_files_of_interest = pd.concat(
    objs=[
        df_files_of_interest.reset_index(drop=True),
        pd.json_normalize(df_files_of_interest['samples'])
    ],
    axis='columns'
)

# Rearrange the DataFrame columns
df_files_of_interest = df_files_of_interest \
    .drop(columns=[
        'access',
        'experimental_strategy',
        'data_type',
        'data_format',
        'samples',
        'is_case_of_interest',
    ])
    
# Flag the files of interest, i.e., those related to a case
# of interest, specific data type, and specific sample type
sample_types = BRCA_PREPROCESSING_PARAMETERS['sample_types']
df_files_of_interest['is_file_of_interest'] = np.where(
    df_files_of_interest['sample_type'].isin(sample_types), 1, 0
)

# Extend the flag column to all files
df_files = df_files \
    .merge(
        right=df_files_of_interest,
        left_on=['file_id', 'case_id'],
        right_on=['file_id', 'case_id'],
        how='left'
    ) \
    .drop(columns=['is_case_of_interest']) \
    .fillna(value={'is_file_of_interest': 0}) \
    .astype({'is_file_of_interest': 'int'})

In [10]:
# Print the DataFrame of potential flagged files
df_files

Unnamed: 0,file_id,case_id,access,experimental_strategy,data_type,data_format,samples,sample_type,tissue_type,is_file_of_interest
0,75a8669a-692c-468c-8469-d2e61f6d37d8,cc348a26-ee11-47a4-8b51-de922967e175,open,Methylation Array,Masked Intensities,IDAT,"[{'sample_type': 'Primary Tumor', 'tissue_type...",,,0
1,7abd94ba-570e-43d4-9360-ad2d8399c960,f2bbfa9d-9a9d-4f46-9fde-378e4c44e2ad,controlled,WGS,Structural Rearrangement,BEDPE,"[{'sample_type': 'Blood Derived Normal', 'tiss...",,,0
2,97ef35dc-9c03-49ce-8103-5e8c55be8805,f2bbfa9d-9a9d-4f46-9fde-378e4c44e2ad,controlled,RNA-Seq,Transcript Fusion,TSV,"[{'sample_type': 'Solid Tissue Normal', 'tissu...",,,0
3,0518551d-4df2-4124-b68d-494200c5586b,a6edb6ca-ae9f-4da7-8ebe-92d83d2987fb,open,WXS,Masked Somatic Mutation,MAF,"[{'sample_type': 'Primary Tumor', 'tissue_type...",,,0
4,2044bb65-05cf-4ef7-9543-3b7c5c2ff4d5,ec0ab947-9341-4fff-bda4-fdfb9434d508,open,Methylation Array,Masked Intensities,IDAT,"[{'sample_type': 'Primary Tumor', 'tissue_type...",,,0
...,...,...,...,...,...,...,...,...,...,...
91917,c680a261-9fdf-48dd-9e87-274881d700c8,da70cf7e-0e61-4c72-b4c5-c408569d11b8,open,miRNA-Seq,miRNA Expression Quantification,TXT,"[{'sample_type': 'Primary Tumor', 'tissue_type...",,,0
91918,ad76f12b-1710-4e83-8cf2-21c1b6863d8e,d9627184-b972-4e00-8c11-b0b946ac357a,controlled,WGS,Aligned Reads,BAM,"[{'sample_type': 'Blood Derived Normal', 'tiss...",,,0
91919,42eacf87-e962-4b33-859e-29944271e335,d9627184-b972-4e00-8c11-b0b946ac357a,open,Genotyping Array,Masked Copy Number Segment,TXT,"[{'sample_type': 'Blood Derived Normal', 'tiss...",,,0
91920,b49a041c-2f46-412f-b10d-b0dcb40737cb,d9627184-b972-4e00-8c11-b0b946ac357a,open,,Biospecimen Supplement,BCR SSF XML,,,,0


## Counting Files per Case

In [11]:
# Filter the potential files of interest
df_files_of_interest = df_files \
    .query('is_file_of_interest == 1') \
    [['file_id', 'case_id', 'experimental_strategy', 'tissue_type']]

# Create an acronym for file type, i.e, experimental strategy and tissue type
df_files_of_interest['file_type'] = (
    df_files_of_interest['experimental_strategy'] 
    + '_' + df_files_of_interest['tissue_type']
)

# Copy some columns of the DataFrame
df_file_count = df_files_of_interest[['case_id', 'file_id', 'file_type']].copy()

# Create flags for each file types
df_file_count['tt_mir'] = np.where(
    df_file_count['file_type'] == 'miRNA-Seq_Tumor', 1, 0
)
df_file_count['tt_mrna'] = np.where(
    df_file_count['file_type'] == 'RNA-Seq_Tumor', 1, 0
)
df_file_count['nt_mir'] = np.where(
    df_file_count['file_type'] == 'miRNA-Seq_Normal', 1, 0
)
df_file_count['nt_mrna'] = np.where(
    df_file_count['file_type'] == 'RNA-Seq_Normal', 1, 0
)

# Initialize the file count per row
df_file_count['total_files'] = 1

# Count the total number of file types per case
df_file_count_agg = df_file_count \
    .groupby('case_id') \
    .agg(
        tumor_mir_files=pd.NamedAgg(column='tt_mir', aggfunc='sum'),
        tumor_mrna_files=pd.NamedAgg(column='tt_mrna', aggfunc='sum'),
        normal_mir_files=pd.NamedAgg(column='nt_mir', aggfunc='sum'),
        normal_mrna_files=pd.NamedAgg(column='nt_mrna', aggfunc='sum'),
        total_files=pd.NamedAgg(column='total_files', aggfunc='sum'),
    ) \
    .sort_values(by=['total_files'], ascending=False) \
    .reset_index()
    
# Add the file count to the cases DataFrame
df_cases = df_cases \
    .merge(
        right=df_file_count_agg,
        left_on='case_id',
        right_on='case_id',
        how='left'
    )

In [12]:
# Print the DataFrame of potential flagged cases with file count
df_cases

Unnamed: 0,case_id,submitter_id,disease_type,pam50_mrna,is_case_of_interest,tumor_mir_files,tumor_mrna_files,normal_mir_files,normal_mrna_files,total_files
0,b1d44c81-747d-471f-9093-aeb262a17975,TCGA-Z7-A8R6,"Epithelial Neoplasms, NOS",,0,,,,,
1,b205bba0-1870-4458-9088-8817e20389fe,TCGA-A8-A09A,Ductal and Lobular Neoplasms,Luminal A,1,1.0,1.0,0.0,0.0,2.0
2,b205c89f-af62-4186-acad-ed23d243fa98,TCGA-A2-A0YL,Ductal and Lobular Neoplasms,Luminal A,1,1.0,1.0,0.0,0.0,2.0
3,b26d41cd-393b-4cd4-8925-a9488f7de576,TCGA-C8-A1HE,Ductal and Lobular Neoplasms,,0,,,,,
4,b2aac45b-2073-4c7a-adb9-769a4fdcc111,TCGA-E9-A1NH,Ductal and Lobular Neoplasms,,0,,,,,
...,...,...,...,...,...,...,...,...,...,...
1093,af5453a9-cf1f-40de-aec4-0e0710908fb7,TCGA-A8-A0AD,Ductal and Lobular Neoplasms,Luminal A,1,1.0,1.0,0.0,0.0,2.0
1094,af97e043-88cb-4f99-8f8b-9bcbcccdf842,TCGA-EW-A2FR,Ductal and Lobular Neoplasms,,0,,,,,
1095,b0700958-5f90-4546-b35f-635cd506889b,TCGA-PE-A5DD,Ductal and Lobular Neoplasms,,0,,,,,
1096,b0bcb829-cee0-4247-bb75-093a5bea89ee,TCGA-E2-A105,Ductal and Lobular Neoplasms,Luminal A,1,1.0,1.0,0.0,0.0,2.0


## Flagged Cases DataFrame

In [13]:
# Flag cases with files related to tumor tissue analysis
df_cases['has_tumor_files_of_interest'] = np.where(
    ((df_cases['is_case_of_interest'] == 1)
     & (df_cases['tumor_mir_files'] == 1)
     & (df_cases['tumor_mrna_files'] == 1)), 1, 0
)

# Flag cases with files related to normal tissue analysis
df_cases['has_normal_files_of_interest'] = np.where(
    ((df_cases['is_case_of_interest'] == 1)
     & (df_cases['normal_mir_files'] == 1)
     & (df_cases['normal_mrna_files'] == 1)), 1, 0
)

# Rearrange the DataFrame columns
df_cases = df_cases.drop(columns=[
    'tumor_mir_files',
    'tumor_mrna_files',
    'normal_mir_files',
    'normal_mrna_files',
    'total_files',
    'is_case_of_interest',
])

# Update the cases of interest flag
df_cases['is_case_of_interest'] = np.where(
    ((df_cases['has_tumor_files_of_interest'] == 1)
     | (df_cases['has_normal_files_of_interest'] == 1)), 1, 0
)

# Store the cases metadata DataFrame in a CSV file
df_cases.to_csv(BRCA_PROCESSED_FILES_PATHS['cases'], index=False)

In [14]:
# Print the DataFrame of flagged cases metadata
df_cases

Unnamed: 0,case_id,submitter_id,disease_type,pam50_mrna,has_tumor_files_of_interest,has_normal_files_of_interest,is_case_of_interest
0,b1d44c81-747d-471f-9093-aeb262a17975,TCGA-Z7-A8R6,"Epithelial Neoplasms, NOS",,0,0,0
1,b205bba0-1870-4458-9088-8817e20389fe,TCGA-A8-A09A,Ductal and Lobular Neoplasms,Luminal A,1,0,1
2,b205c89f-af62-4186-acad-ed23d243fa98,TCGA-A2-A0YL,Ductal and Lobular Neoplasms,Luminal A,1,0,1
3,b26d41cd-393b-4cd4-8925-a9488f7de576,TCGA-C8-A1HE,Ductal and Lobular Neoplasms,,0,0,0
4,b2aac45b-2073-4c7a-adb9-769a4fdcc111,TCGA-E9-A1NH,Ductal and Lobular Neoplasms,,0,0,0
...,...,...,...,...,...,...,...
1093,af5453a9-cf1f-40de-aec4-0e0710908fb7,TCGA-A8-A0AD,Ductal and Lobular Neoplasms,Luminal A,1,0,1
1094,af97e043-88cb-4f99-8f8b-9bcbcccdf842,TCGA-EW-A2FR,Ductal and Lobular Neoplasms,,0,0,0
1095,b0700958-5f90-4546-b35f-635cd506889b,TCGA-PE-A5DD,Ductal and Lobular Neoplasms,,0,0,0
1096,b0bcb829-cee0-4247-bb75-093a5bea89ee,TCGA-E2-A105,Ductal and Lobular Neoplasms,Luminal A,1,0,1


## Flagged Files DataFrame

In [15]:
# Retrieve the flag of interest of the cases    
df_files = df_files \
    .merge(
        right=df_cases,
        left_on='case_id',
        right_on='case_id',
        how='inner'
    ) \
    .drop(columns=['submitter_id', 'disease_type', 'pam50_mrna',])

# Flag the files related to tumor tissue analysis
df_files['is_tumor_file_of_interest'] = np.where(
    ((df_files['is_file_of_interest'] == 1)
     & (df_files['has_tumor_files_of_interest'] == 1)
     & (df_files['tissue_type'] == 'Tumor')), 1, 0
)

# Flag the files related to normal tissue analysis
df_files['is_normal_file_of_interest'] = np.where(
    ((df_files['is_file_of_interest'] == 1)
     & (df_files['has_normal_files_of_interest'] == 1)
     & (df_files['tissue_type'] == 'Normal')), 1, 0
)

# Rearrange the DataFrame columns
df_files = df_files.drop(columns=[
    'sample_type',
    'tissue_type',
    'is_file_of_interest',
    'has_tumor_files_of_interest',
    'has_normal_files_of_interest',
    'is_case_of_interest',
])

# Update the files of interest flag
df_files['is_file_of_interest'] = np.where(
    ((df_files['is_tumor_file_of_interest'] == 1)
     | (df_files['is_normal_file_of_interest'] == 1)), 1, 0
)

# Store the files metadata DataFrame in a CSV file
df_files.to_csv(BRCA_PROCESSED_FILES_PATHS['files'], index=False)

In [16]:
# Print the DataFrame of flagged files metadata
df_files

Unnamed: 0,file_id,case_id,access,experimental_strategy,data_type,data_format,samples,is_tumor_file_of_interest,is_normal_file_of_interest,is_file_of_interest
0,75a8669a-692c-468c-8469-d2e61f6d37d8,cc348a26-ee11-47a4-8b51-de922967e175,open,Methylation Array,Masked Intensities,IDAT,"[{'sample_type': 'Primary Tumor', 'tissue_type...",0,0,0
1,7abd94ba-570e-43d4-9360-ad2d8399c960,f2bbfa9d-9a9d-4f46-9fde-378e4c44e2ad,controlled,WGS,Structural Rearrangement,BEDPE,"[{'sample_type': 'Blood Derived Normal', 'tiss...",0,0,0
2,97ef35dc-9c03-49ce-8103-5e8c55be8805,f2bbfa9d-9a9d-4f46-9fde-378e4c44e2ad,controlled,RNA-Seq,Transcript Fusion,TSV,"[{'sample_type': 'Solid Tissue Normal', 'tissu...",0,0,0
3,0518551d-4df2-4124-b68d-494200c5586b,a6edb6ca-ae9f-4da7-8ebe-92d83d2987fb,open,WXS,Masked Somatic Mutation,MAF,"[{'sample_type': 'Primary Tumor', 'tissue_type...",0,0,0
4,2044bb65-05cf-4ef7-9543-3b7c5c2ff4d5,ec0ab947-9341-4fff-bda4-fdfb9434d508,open,Methylation Array,Masked Intensities,IDAT,"[{'sample_type': 'Primary Tumor', 'tissue_type...",0,0,0
...,...,...,...,...,...,...,...,...,...,...
91917,c680a261-9fdf-48dd-9e87-274881d700c8,da70cf7e-0e61-4c72-b4c5-c408569d11b8,open,miRNA-Seq,miRNA Expression Quantification,TXT,"[{'sample_type': 'Primary Tumor', 'tissue_type...",0,0,0
91918,ad76f12b-1710-4e83-8cf2-21c1b6863d8e,d9627184-b972-4e00-8c11-b0b946ac357a,controlled,WGS,Aligned Reads,BAM,"[{'sample_type': 'Blood Derived Normal', 'tiss...",0,0,0
91919,42eacf87-e962-4b33-859e-29944271e335,d9627184-b972-4e00-8c11-b0b946ac357a,open,Genotyping Array,Masked Copy Number Segment,TXT,"[{'sample_type': 'Blood Derived Normal', 'tiss...",0,0,0
91920,b49a041c-2f46-412f-b10d-b0dcb40737cb,d9627184-b972-4e00-8c11-b0b946ac357a,open,,Biospecimen Supplement,BCR SSF XML,,0,0,0
