In [1]:
from bravado.client import SwaggerClient

cbioportal = SwaggerClient.from_url('https://www.cbioportal.org/api/v2/api-docs',
                                config={"validate_requests":False,"validate_responses":False,"validate_swagger_spec":False})
print(cbioportal)

SwaggerClient(https://www.cbioportal.org/api)


In [2]:
import pandas as pd

In [3]:
for a in dir(cbioportal):
    cbioportal.__setattr__(a.replace(' ', '_').lower(), cbioportal.__getattr__(a))# 

In [4]:
cancerTypeId = "cllsll"

In [5]:
# cll = cbioportal.Cancer_Types.getCancerTypeUsingGET(cancerTypeId).result()
# print(cll)

In [6]:
dir(cbioportal.Studies)

['fetchStudiesUsingPOST',
 'getAllStudiesUsingGET',
 'getStudyUsingGET',
 'getTagsForMultipleStudiesUsingPOST',
 'getTagsUsingGET']

In [7]:
all_studies = cbioportal.Studies.getAllStudiesUsingGET().result()
cll_studies = [s for s in all_studies if s.cancerTypeId == cancerTypeId] 

In [8]:
cll_studies

[CancerStudy(allSampleCount=105, cancerType=None, cancerTypeId='cllsll', citation='Quesada et al. Nature Genetics 2011', cnaSampleCount=None, completeSampleCount=None, description='Whole-exome sequencing of 105 Chronic Lymphocytic Leukemia samples.', groups='PUBLIC', importDate='2024-12-04 11:30:41', massSpectrometrySampleCount=None, methylationHm27SampleCount=None, miRnaSampleCount=None, mrnaMicroarraySampleCount=None, mrnaRnaSeqSampleCount=None, mrnaRnaSeqV2SampleCount=None, name='Chronic lymphocytic leukemia (ICGC, Nature Genetics 2011)', pmid='22158541', publicStudy=True, readPermission=True, referenceGenome='hg19', rppaSampleCount=None, sequencedSampleCount=None, status=0, studyId='cllsll_icgc_2011', treatmentCount=None),
 CancerStudy(allSampleCount=160, cancerType=None, cancerTypeId='cllsll', citation='Landau et al. Cell 2013', cnaSampleCount=None, completeSampleCount=None, description='Whole-exome sequencing of 160 CLL tumor/normal pairs.', groups='', importDate='2024-12-05 13:2

In [9]:
total_samples = 0
for study in cll_studies:
    total_samples += study.allSampleCount
    print(study.studyId, study.description, study.allSampleCount)
print("Total samples:", total_samples)

cllsll_icgc_2011 Whole-exome sequencing of 105 Chronic Lymphocytic Leukemia samples. 105
lcll_broad_2013 Whole-exome sequencing of 160 CLL tumor/normal pairs. 160
cll_iuopa_2015 Mutation data from whole-genome or whole-exome sequencing of 428 CLL, 54 MBL, and 24 SLL tumor/normal pairs. 506
cll_broad_2015 Whole exome sequencing of 537 chronic lymphocytic leukemia tumor/normal pairs. 537
cll_broad_2022 Whole genome and whole exome sequencing of 1,154 samples from 1,148 patients with Chronic Lymphocytic Leukemia and monoclonal B cell lymphocytosis and their matched normals. 1154
Total samples: 2462


In [10]:
def write_df_to_csv(filename, df):
    df.to_csv(filename, index=False)
    print(f"Data written to {filename}")

## Clinical data

In [11]:
def get_data_for_study(studyId):

    clinical_data_sample = cbioportal.Clinical_Data.getAllClinicalDataInStudyUsingGET(
        studyId=studyId,
        projection="DETAILED",
        clinicalDataType="SAMPLE"
    ).result()

    clinical_data_patient = cbioportal.Clinical_Data.getAllClinicalDataInStudyUsingGET(
        studyId=studyId,
        projection="DETAILED",
        clinicalDataType="PATIENT"
    ).result()

    csdf = pd.DataFrame.from_dict([
        # python magic that combines two dictionaries:
        dict(
            {k:getattr(m,k) for k in dir(m)},
        )
        # create one item in the list for each mutation
        for m in clinical_data_sample
    ])
    cpdf = pd.DataFrame.from_dict([
        # python magic that combines two dictionaries:
        dict(
            {k:getattr(m,k) for k in dir(m)},
        )
        # create one item in the list for each mutation
        for m in clinical_data_patient
    ])


    cpdf.drop(columns=['clinicalAttribute'], inplace=True)
    csdf.drop(columns=['clinicalAttribute'], inplace=True)


    mutations = cbioportal.Mutations.getMutationsInMolecularProfileBySampleListIdUsingGET(
        molecularProfileId=f'{studyId}_mutations',
        sampleListId=f'{studyId}_all',
        projection='DETAILED'
    ).result()

    mdf = pd.DataFrame.from_dict([
    # python magic that combines two dictionaries:
        dict(
            {k:getattr(m,k) for k in dir(m)},
            **{k:getattr(m.gene,k) for k in dir(m.gene)}) 
        # create one item in the list for each mutation
        for m in mutations
    ])
    mdf.drop(columns=['gene', 'uniquePatientKey', 'uniqueSampleKey'], inplace=True)

    return cpdf, csdf, mdf


In [12]:
# using these 2 dataframes, i want to create a df with patient id, sample id, age, sex (all clinicalattributeid from cpdf), CANCER_TYPE, (all clinicalattributeid from csdf)
def process_patient_sample_data(cpdf, csdf):
    # display(cpdf[cpdf.patientId == 'P-CRC-0001'])
    # display(csdf[csdf.patientId == 'P-CRC-0001'])
    pivot_patient_df = cpdf.pivot(index='patientId', columns='clinicalAttributeId', values='value').reset_index()
    pivot_sample_df = csdf.pivot(index=['patientId', 'sampleId', 'studyId'], columns='clinicalAttributeId', values='value').reset_index()
    df = pivot_sample_df.merge(pivot_patient_df, on='patientId', how='left')
    return df

In [None]:
def process_study(studyId):
    cpdf, csdf, mdf = get_data_for_study(studyId)
    df = process_patient_sample_data(cpdf, csdf)
    print(df.shape, df.columns)
    display(df.head())
    print(mdf.shape, mdf.columns)
    display(mdf.head())

    # drop columsn where all values are null
    df.dropna(axis=1, how='all', inplace=True)
    mdf.dropna(axis=1, how='all', inplace=True)

    write_df_to_csv(f'../../data/cbioportal/{studyId}_clinical_data.csv', df)
    write_df_to_csv(f'../../data/cbioportal/{studyId}_mutations.csv', mdf)
    

In [14]:
def driver():
    for study in cll_studies:
        studyId = study.studyId
        print(studyId)
        process_study(studyId)
        

In [15]:
driver()

cllsll_icgc_2011
(105, 10) Index(['patientId', 'sampleId', 'studyId', 'CANCER_TYPE',
       'CANCER_TYPE_DETAILED', 'MUTATION_COUNT', 'ONCOTREE_CODE',
       'SOMATIC_STATUS', 'TMB_NONSYNONYMOUS', 'SAMPLE_COUNT'],
      dtype='object', name='clinicalAttributeId')


clinicalAttributeId,patientId,sampleId,studyId,CANCER_TYPE,CANCER_TYPE_DETAILED,MUTATION_COUNT,ONCOTREE_CODE,SOMATIC_STATUS,TMB_NONSYNONYMOUS,SAMPLE_COUNT
0,CLL_100,CLL_100,cllsll_icgc_2011,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,6,CLLSLL,Matched,0.2,1
1,CLL_110,CLL_110,cllsll_icgc_2011,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,10,CLLSLL,Matched,0.333333333333,1
2,CLL_117,CLL_117,cllsll_icgc_2011,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,11,CLLSLL,Matched,0.366666666667,1
3,CLL_124,CLL_124,cllsll_icgc_2011,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,11,CLLSLL,Matched,0.366666666667,1
4,CLL_13,CLL_13,cllsll_icgc_2011,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,13,CLLSLL,Matched,0.433333333333,1


(1067, 35) Index(['alleleSpecificCopyNumber', 'aminoAcidChange', 'center', 'chr',
       'driverFilter', 'driverFilterAnnotation', 'driverTiersFilter',
       'driverTiersFilterAnnotation', 'endPosition', 'entrezGeneId', 'keyword',
       'molecularProfileId', 'mutationStatus', 'mutationType',
       'namespaceColumns', 'ncbiBuild', 'normalAltCount', 'normalRefCount',
       'patientId', 'proteinChange', 'proteinPosEnd', 'proteinPosStart',
       'referenceAllele', 'refseqMrnaId', 'sampleId', 'startPosition',
       'studyId', 'tumorAltCount', 'tumorRefCount', 'validationStatus',
       'variantAllele', 'variantType', 'geneticEntityId', 'hugoGeneSymbol',
       'type'],
      dtype='object')


Unnamed: 0,alleleSpecificCopyNumber,aminoAcidChange,center,chr,driverFilter,driverFilterAnnotation,driverTiersFilter,driverTiersFilterAnnotation,endPosition,entrezGeneId,...,startPosition,studyId,tumorAltCount,tumorRefCount,validationStatus,variantAllele,variantType,geneticEntityId,hugoGeneSymbol,type
0,,,Oviedo,12,,,,,9021782,144568,...,9021782,cllsll_icgc_2011,,,,G,SNP,,A2ML1,protein-coding
1,,,Oviedo,1,,,,,169391327,57821,...,169391327,cllsll_icgc_2011,,,,T,SNP,,CCDC181,protein-coding
2,,,Oviedo,14,,,,,95660226,79789,...,95660226,cllsll_icgc_2011,,,,T,SNP,,CLMN,protein-coding
3,,,Oviedo,17,,,,,36002206,11056,...,36002206,cllsll_icgc_2011,,,,A,SNP,,DDX52,protein-coding
4,,,Oviedo,3,,,,,38141882,9940,...,38141882,cllsll_icgc_2011,,,,A,SNP,,DLEC1,protein-coding


Data written to ../../data/cllsll_icgc_2011_clinical_data.csv
Data written to ../../data/cllsll_icgc_2011_mutations.csv
lcll_broad_2013
(160, 10) Index(['patientId', 'sampleId', 'studyId', 'CANCER_TYPE',
       'CANCER_TYPE_DETAILED', 'MUTATION_COUNT', 'ONCOTREE_CODE',
       'SOMATIC_STATUS', 'TMB_NONSYNONYMOUS', 'SAMPLE_COUNT'],
      dtype='object', name='clinicalAttributeId')


clinicalAttributeId,patientId,sampleId,studyId,CANCER_TYPE,CANCER_TYPE_DETAILED,MUTATION_COUNT,ONCOTREE_CODE,SOMATIC_STATUS,TMB_NONSYNONYMOUS,SAMPLE_COUNT
0,CLL001,CLL001,lcll_broad_2013,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,13,CLLSLL,Matched,0.433333333333,1
1,CLL003,CLL003,lcll_broad_2013,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,14,CLLSLL,Matched,0.466666666667,1
2,CLL004,CLL004,lcll_broad_2013,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,12,CLLSLL,Matched,0.4,1
3,CLL005,CLL005,lcll_broad_2013,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,19,CLLSLL,Matched,0.633333333333,1
4,CLL006,CLL006,lcll_broad_2013,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,29,CLLSLL,Matched,0.966666666667,1


(2312, 35) Index(['alleleSpecificCopyNumber', 'aminoAcidChange', 'center', 'chr',
       'driverFilter', 'driverFilterAnnotation', 'driverTiersFilter',
       'driverTiersFilterAnnotation', 'endPosition', 'entrezGeneId', 'keyword',
       'molecularProfileId', 'mutationStatus', 'mutationType',
       'namespaceColumns', 'ncbiBuild', 'normalAltCount', 'normalRefCount',
       'patientId', 'proteinChange', 'proteinPosEnd', 'proteinPosStart',
       'referenceAllele', 'refseqMrnaId', 'sampleId', 'startPosition',
       'studyId', 'tumorAltCount', 'tumorRefCount', 'validationStatus',
       'variantAllele', 'variantType', 'geneticEntityId', 'hugoGeneSymbol',
       'type'],
      dtype='object')


Unnamed: 0,alleleSpecificCopyNumber,aminoAcidChange,center,chr,driverFilter,driverFilterAnnotation,driverTiersFilter,driverTiersFilterAnnotation,endPosition,entrezGeneId,...,startPosition,studyId,tumorAltCount,tumorRefCount,validationStatus,variantAllele,variantType,geneticEntityId,hugoGeneSymbol,type
0,,,broad.mit.edu,3,,,,,38182641,4615,...,38182641,lcll_broad_2013,,,,C,SNP,,MYD88,protein-coding
1,,,broad.mit.edu,12,,,,,86373542,25834,...,86373542,lcll_broad_2013,,,,A,SNP,,MGAT4C,protein-coding
2,,,broad.mit.edu,12,,,,,76741541,79738,...,76741541,lcll_broad_2013,,,,C,SNP,,BBS10,protein-coding
3,,,broad.mit.edu,2,,,,,56419944,114800,...,56419944,lcll_broad_2013,,,,G,SNP,,CCDC85A,protein-coding
4,,,broad.mit.edu,3,,,,,52812956,3697,...,52812956,lcll_broad_2013,,,,T,SNP,,ITIH1,protein-coding


Data written to ../../data/lcll_broad_2013_clinical_data.csv
Data written to ../../data/lcll_broad_2013_mutations.csv
cll_iuopa_2015
(506, 16) Index(['patientId', 'sampleId', 'studyId', 'CANCER_TYPE',
       'CANCER_TYPE_DETAILED', 'EXOME_CAPTURE', 'IGHV_GENE', 'IGHV_IDENTITY',
       'IGHV_MUTATED', 'MUTATION_COUNT', 'ONCOTREE_CODE', 'PLATFORM',
       'SOMATIC_STATUS', 'TMB_NONSYNONYMOUS', 'EPIGENETIC', 'SAMPLE_COUNT'],
      dtype='object', name='clinicalAttributeId')


clinicalAttributeId,patientId,sampleId,studyId,CANCER_TYPE,CANCER_TYPE_DETAILED,EXOME_CAPTURE,IGHV_GENE,IGHV_IDENTITY,IGHV_MUTATED,MUTATION_COUNT,ONCOTREE_CODE,PLATFORM,SOMATIC_STATUS,TMB_NONSYNONYMOUS,EPIGENETIC,SAMPLE_COUNT
0,cll_iuopa_2015_1,cll_iuopa_2015_1,cll_iuopa_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,SureSelect Human All Exon,IGHV1-3,100.0,UNMUT,12,CLLSLL,"WES,WGS",Matched,0.4,Naive-like CLL,1
1,cll_iuopa_2015_10,cll_iuopa_2015_10,cll_iuopa_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,SureSelect XT Human All Exon 50 Mb,IGHV3-21,99.6,UNMUT,23,CLLSLL,"WES,WGS",Matched,0.766666666667,Intermediate CLL,1
2,cll_iuopa_2015_100,cll_iuopa_2015_100,cll_iuopa_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,SureSelect XT Human All Exon 50 Mb,IGHV1-69,100.0,UNMUT,7,CLLSLL,WES,Matched,0.233333333333,Naive-like CLL,1
3,cll_iuopa_2015_1047,cll_iuopa_2015_1047,cll_iuopa_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,SureSelect XT2 Human All Exon v4+UTRs,IGHV4-59,97.19,MUT,8,CLLSLL,WES,Matched,0.266666666667,Memory-like CLL,1
4,cll_iuopa_2015_1050,cll_iuopa_2015_1050,cll_iuopa_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,SureSelect XT2 Human All Exon v4+UTRs,IGHV1-69,100.0,UNMUT,13,CLLSLL,WES,Matched,0.433333333333,Naive-like CLL,1


(7134, 35) Index(['alleleSpecificCopyNumber', 'aminoAcidChange', 'center', 'chr',
       'driverFilter', 'driverFilterAnnotation', 'driverTiersFilter',
       'driverTiersFilterAnnotation', 'endPosition', 'entrezGeneId', 'keyword',
       'molecularProfileId', 'mutationStatus', 'mutationType',
       'namespaceColumns', 'ncbiBuild', 'normalAltCount', 'normalRefCount',
       'patientId', 'proteinChange', 'proteinPosEnd', 'proteinPosStart',
       'referenceAllele', 'refseqMrnaId', 'sampleId', 'startPosition',
       'studyId', 'tumorAltCount', 'tumorRefCount', 'validationStatus',
       'variantAllele', 'variantType', 'geneticEntityId', 'hugoGeneSymbol',
       'type'],
      dtype='object')


Unnamed: 0,alleleSpecificCopyNumber,aminoAcidChange,center,chr,driverFilter,driverFilterAnnotation,driverTiersFilter,driverTiersFilterAnnotation,endPosition,entrezGeneId,...,startPosition,studyId,tumorAltCount,tumorRefCount,validationStatus,variantAllele,variantType,geneticEntityId,hugoGeneSymbol,type
0,,,www.unioviedo.es/IUOPA/,19,,,,,54646887,4849,...,54646887,cll_iuopa_2015,,,,A,SNP,,CNOT3,protein-coding
1,,,www.unioviedo.es/IUOPA/,3,,,,,78700896,6091,...,78700896,cll_iuopa_2015,,,,A,SNP,,ROBO1,protein-coding
2,,,www.unioviedo.es/IUOPA/,7,,,,,20778606,340273,...,20778606,cll_iuopa_2015,,,,T,SNP,,ABCB5,protein-coding
3,,,www.unioviedo.es/IUOPA/,6,,,,,71665885,135152,...,71665885,cll_iuopa_2015,,,,A,SNP,,B3GAT2,protein-coding
4,,,www.unioviedo.es/IUOPA/,19,,,,,41383838,1549,...,41383838,cll_iuopa_2015,,,,A,SNP,,CYP2A7,protein-coding


Data written to ../../data/cll_iuopa_2015_clinical_data.csv
Data written to ../../data/cll_iuopa_2015_mutations.csv
cll_broad_2015
(537, 14) Index(['patientId', 'sampleId', 'studyId', 'CANCER_TYPE',
       'CANCER_TYPE_DETAILED', 'CAPTURE_BAIT_SET', 'IGHV_GENE',
       'MUTATION_COUNT', 'ONCOTREE_CODE', 'SOMATIC_STATUS',
       'TMB_NONSYNONYMOUS', 'COHORT', 'PRIOR_TREATMENT', 'SAMPLE_COUNT'],
      dtype='object', name='clinicalAttributeId')


clinicalAttributeId,patientId,sampleId,studyId,CANCER_TYPE,CANCER_TYPE_DETAILED,CAPTURE_BAIT_SET,IGHV_GENE,MUTATION_COUNT,ONCOTREE_CODE,SOMATIC_STATUS,TMB_NONSYNONYMOUS,COHORT,PRIOR_TREATMENT,SAMPLE_COUNT
0,CLL-GCLL-0001,CLL-GCLL-0001-Tumor-SM-41JLZ,cll_broad_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,whole_exome_agilent_1.1_refseq_plus_3_boosters,mutated,8,CLLSLL,Matched,0.266666666667,GCLLSG_CLL8,treatment naive,1
1,CLL-GCLL-0002,CLL-GCLL-0002-Tumor-SM-41JM1,cll_broad_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,whole_exome_agilent_1.1_refseq_plus_3_boosters,mutated,12,CLLSLL,Matched,0.4,GCLLSG_CLL8,treatment naive,1
2,CLL-GCLL-0003,CLL-GCLL-0003-Tumor-SM-41JM2,cll_broad_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,whole_exome_agilent_1.1_refseq_plus_3_boosters,mutated,14,CLLSLL,Matched,0.466666666667,GCLLSG_CLL8,treatment naive,1
3,CLL-GCLL-0004,CLL-GCLL-0004-Tumor-SM-41JM3,cll_broad_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,whole_exome_agilent_1.1_refseq_plus_3_boosters,mutated,9,CLLSLL,Matched,0.3,GCLLSG_CLL8,treatment naive,1
4,CLL-GCLL-0006,CLL-GCLL-0006-Tumor-SM-41JM5,cll_broad_2015,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,whole_exome_agilent_1.1_refseq_plus_3_boosters,mutated,13,CLLSLL,Matched,0.4,GCLLSG_CLL8,treatment naive,1


(9487, 35) Index(['alleleSpecificCopyNumber', 'aminoAcidChange', 'center', 'chr',
       'driverFilter', 'driverFilterAnnotation', 'driverTiersFilter',
       'driverTiersFilterAnnotation', 'endPosition', 'entrezGeneId', 'keyword',
       'molecularProfileId', 'mutationStatus', 'mutationType',
       'namespaceColumns', 'ncbiBuild', 'normalAltCount', 'normalRefCount',
       'patientId', 'proteinChange', 'proteinPosEnd', 'proteinPosStart',
       'referenceAllele', 'refseqMrnaId', 'sampleId', 'startPosition',
       'studyId', 'tumorAltCount', 'tumorRefCount', 'validationStatus',
       'variantAllele', 'variantType', 'geneticEntityId', 'hugoGeneSymbol',
       'type'],
      dtype='object')


Unnamed: 0,alleleSpecificCopyNumber,aminoAcidChange,center,chr,driverFilter,driverFilterAnnotation,driverTiersFilter,driverTiersFilterAnnotation,endPosition,entrezGeneId,...,startPosition,studyId,tumorAltCount,tumorRefCount,validationStatus,variantAllele,variantType,geneticEntityId,hugoGeneSymbol,type
0,,,broad.mit.edu,6,,,,,129802568,3908,...,129802568,cll_broad_2015,63,97,,A,SNP,,LAMA2,protein-coding
1,,,broad.mit.edu,1,,,,,120438518,11085,...,120438518,cll_broad_2015,4,116,,A,SNP,,ADAM30,protein-coding
2,,,broad.mit.edu,20,,,,,4229250,146,...,4229250,cll_broad_2015,14,24,,A,SNP,,ADRA1D,protein-coding
3,,,broad.mit.edu,2,,,,,196799323,56171,...,196799323,cll_broad_2015,78,105,,C,SNP,,DNAH7,protein-coding
4,,,broad.mit.edu,1,,,,,228612633,8290,...,228612633,cll_broad_2015,43,34,,A,SNP,,H3-4,protein-coding


Data written to ../../data/cll_broad_2015_clinical_data.csv
Data written to ../../data/cll_broad_2015_mutations.csv
cll_broad_2022
(1154, 43) Index(['patientId', 'sampleId', 'studyId', 'ARRAY450K', 'CANCER_TYPE',
       'CANCER_TYPE_DETAILED', 'CENTER', 'CLL_EPITYPE', 'DISEASE_TYPE',
       'EC_DISCOVERY', 'EC_EXTENSION', 'IGLV3_21_R110', 'M-CLL_WES',
       'MUTATION_COUNT', 'NORMAL_MEAN_COVERAGE', 'ONCOTREE_CODE',
       'RNA_SEQUENCING', 'RRBS', 'SEQUENCING_PLATFORM', 'SOMATIC_STATUS',
       'TMB_NONSYNONYMOUS', 'TUMOR_MEAN_COVERAGE', 'TUMOR_MOLECULAR_SUBTYPE',
       'TUMOR_SAMPLE_PLOIDY', 'TUMOR_SAMPLE_PURITY', 'U-CLL_WES', 'U1_STATUS',
       'AGE', 'AGE_SAMPLING', 'COHORT', 'DEATH_DAYS', 'EXPRESSION_CLUSTER',
       'FFS_MONTHS', 'FFS_STATUS', 'IGHV_IDENTITY_PERCENTAGE',
       'IGHV_MUTATION_STATUS', 'OS_MONTHS', 'OS_STATUS',
       'PRIOR_TREATMENT_CATEGORY', 'SAMPLE_COUNT', 'SEX',
       'TREATMENT_AFTER_SAMPLING', 'TREATMENT_STATUS'],
      dtype='object', name='clinicalAtt

clinicalAttributeId,patientId,sampleId,studyId,ARRAY450K,CANCER_TYPE,CANCER_TYPE_DETAILED,CENTER,CLL_EPITYPE,DISEASE_TYPE,EC_DISCOVERY,...,FFS_STATUS,IGHV_IDENTITY_PERCENTAGE,IGHV_MUTATION_STATUS,OS_MONTHS,OS_STATUS,PRIOR_TREATMENT_CATEGORY,SAMPLE_COUNT,SEX,TREATMENT_AFTER_SAMPLING,TREATMENT_STATUS
0,P-CRC-0001,CRC-0001,cll_broad_2022,No,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,Broad,n-CLL,CLL,Yes,...,1:Failure,100.0,unmutated,147.19,1:DECEASED,Untreated,1,Female,Chemo + Ab,Pre-treatment
1,P-CRC-0002,CRC-0002,cll_broad_2022,No,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,Broad,n-CLL,CLL,Yes,...,1:Failure,100.0,unmutated,154.49,1:DECEASED,Untreated,1,Male,Chemo + Ab,Pre-treatment
2,P-CRC-0003,CRC-0003,cll_broad_2022,No,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,Broad,n-CLL,CLL,Yes,...,1:Failure,100.0,unmutated,51.25,1:DECEASED,Untreated,1,Female,Chemo + Ab,Pre-treatment
3,P-CRC-0004,CRC-0004,cll_broad_2022,No,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,Broad,m-CLL,CLL,Yes,...,1:Failure,92.28,mutated,183.55,0:LIVING,Untreated,1,Male,Chemo + Ab,Pre-treatment
4,P-CRC-0005,CRC-0005,cll_broad_2022,No,Mature B-Cell Neoplasms,Chronic Lymphocytic Leukemia/Small Lymphocytic...,Broad,n-CLL,CLL,Yes,...,1:Failure,100.0,unmutated,164.35,0:LIVING,Untreated,1,Male,Chemo + Ab,Pre-treatment


(30229, 35) Index(['alleleSpecificCopyNumber', 'aminoAcidChange', 'center', 'chr',
       'driverFilter', 'driverFilterAnnotation', 'driverTiersFilter',
       'driverTiersFilterAnnotation', 'endPosition', 'entrezGeneId', 'keyword',
       'molecularProfileId', 'mutationStatus', 'mutationType',
       'namespaceColumns', 'ncbiBuild', 'normalAltCount', 'normalRefCount',
       'patientId', 'proteinChange', 'proteinPosEnd', 'proteinPosStart',
       'referenceAllele', 'refseqMrnaId', 'sampleId', 'startPosition',
       'studyId', 'tumorAltCount', 'tumorRefCount', 'validationStatus',
       'variantAllele', 'variantType', 'geneticEntityId', 'hugoGeneSymbol',
       'type'],
      dtype='object')


Unnamed: 0,alleleSpecificCopyNumber,aminoAcidChange,center,chr,driverFilter,driverFilterAnnotation,driverTiersFilter,driverTiersFilterAnnotation,endPosition,entrezGeneId,...,startPosition,studyId,tumorAltCount,tumorRefCount,validationStatus,variantAllele,variantType,geneticEntityId,hugoGeneSymbol,type
0,,,Broad,2,,,,,98928394,200403,...,98928394,cll_broad_2022,3,18,,A,SNP,,VWA3B,protein-coding
1,,,Broad,2,,,,,198267491,23451,...,198267491,cll_broad_2022,35,27,,G,SNP,,SF3B1,protein-coding
2,,,Broad,23,,,,,83128311,1538,...,83128311,cll_broad_2022,4,42,,C,SNP,,CYLC1,protein-coding
3,,,Broad,12,,,,,15654580,5800,...,15654580,cll_broad_2022,38,35,,A,SNP,,PTPRO,protein-coding
4,,,Broad,8,,,,,139164553,51059,...,139164553,cll_broad_2022,3,49,,A,SNP,,FAM135B,protein-coding


Data written to ../../data/cll_broad_2022_clinical_data.csv
Data written to ../../data/cll_broad_2022_mutations.csv


In [16]:
x = cbioportal.Clinical_Attributes.getAllClinicalAttributesInStudyUsingGET(
    studyId="cll_broad_2022",
    projection="DETAILED"
).result()

In [17]:
for i in x:
    # if not i.patientAttribute:
    print(i.clinicalAttributeId, i.description)

AGE Age at diagnosis
AGE_SAMPLING Age at sampling
ARRAY450K Sample included in 450K methylation array 
CANCER_TYPE Cancer Type
CANCER_TYPE_DETAILED Cancer Type Detailed
CENTER Sequencing center
CLL_EPITYPE CLL epitype: naive-like (n-CLL), intermediate (i-CLL) and memory-like (m-CLL)
COHORT Cohort
DEATH_DAYS Death (days) from the date of sampling
DISEASE_TYPE Disease type
EC_DISCOVERY Sample included in expression cluster discovery set (1=yes, 0=no)
EC_EXTENSION Sample included in expression cluster extension set (1=yes, 0=no)
EXPRESSION_CLUSTER Expression cluster
FFS_MONTHS Failure-free survival (months)
FFS_STATUS Failure-free survival status
IGHV_IDENTITY_PERCENTAGE Immunoglobulin genes mutation identity percentage
IGHV_MUTATION_STATUS Immunoglobulin genes mutation status
IGLV3_21_R110 IGLV3-21 R110 mutation status
M-CLL_WES Sample included in M-CLL WES genetic candidate driver discovery cohort (1=yes, 0=no)
MUTATION_COUNT Mutation Count
NORMAL_MEAN_COVERAGE Normal Mean Coverage (WES

In [18]:
studyId = "cll_broad_2022"
mutations = cbioportal.Mutations.getMutationsInMolecularProfileBySampleListIdUsingGET(
        molecularProfileId=f'{studyId}_mutations',
        sampleListId=f'{studyId}_all',
        projection='DETAILED'
    ).result()

In [19]:
mutations[0]

Mutation(alleleSpecificCopyNumber=None, aminoAcidChange=None, center='Broad', chr='2', driverFilter=None, driverFilterAnnotation=None, driverTiersFilter=None, driverTiersFilterAnnotation=None, endPosition=98928394, entrezGeneId=200403, gene=Gene(entrezGeneId=200403, geneticEntityId=None, hugoGeneSymbol='VWA3B', type='protein-coding'), keyword='VWA3B A1212 missense', molecularProfileId='cll_broad_2022_mutations', mutationStatus='NA', mutationType='Missense_Mutation', namespaceColumns=None, ncbiBuild='GRCh37', normalAltCount=0, normalRefCount=12, patientId='P-CRC-0001', proteinChange='A1212T', proteinPosEnd=1212, proteinPosStart=1212, referenceAllele='G', refseqMrnaId='NM_144992.4', sampleId='CRC-0001', startPosition=98928394, studyId='cll_broad_2022', tumorAltCount=3, tumorRefCount=18, uniquePatientKey='UC1DUkMtMDAwMTpjbGxfYnJvYWRfMjAyMg', uniqueSampleKey='Q1JDLTAwMDE6Y2xsX2Jyb2FkXzIwMjI', validationStatus='NA', variantAllele='A', variantType='SNP')