In [1]:
import requests
import json
from io import StringIO
import pandas as pd
import numpy as np
import re
import os
from pandas.errors import EmptyDataError 

In [2]:
def save_files():
    idx = 0
    
    # Fields we want in the dataframe
    fields = [
        "file_name",
        "cases.submitter_id",
        "cases.samples.sample_type",
        "cases.disease_type",
        "cases.project.project_id",
        "cases.primary_site"
        ]

    fields = ",".join(fields)

    files_endpt = "https://api.gdc.cancer.gov/files"

    # Filters for the API
    filters = {
        "op": "and",
        "content":[
            {
            "op": "in",
            "content":{
                "field": "files.data_format",
                "value": ["TXT"]
                }
            },
            {
            "op": "in",
            "content":{
                "field": "files.data_category",
                "value": ["DNA Methylation"]
                }
            },
            {
            "op": "in",
            "content":{
                "field": "files.platform",
                "value": ["Illumina Human Methylation 450"]
                }
            },
        ]
    }

    # A POST is used, so the filter parameters can be passed directly as a Dict object.
    params = {
        "filters": filters,
        "fields": fields,
        "format": "TSV",
        "size": "20000"
        }

    # The parameters are passed to 'json' rather than 'params' in this case
    response = requests.post(files_endpt, headers = {"Content-Type": "application/json"}, json = params)
    resp = response.content.decode("utf-8") # Decode the response to a string
    
    # Make and return Dataframe
    try:
        resp_df = pd.read_csv(StringIO(resp), sep='\t')
        resp_df.to_csv('TCGA_masterfile.txt', sep='\t', index=None)
        return resp_df
    except EmptyDataError:
        print('No tissue')
        return 0

In [3]:
res = save_files()

In [4]:
res

Unnamed: 0,cases.0.disease_type,cases.0.primary_site,cases.0.project.project_id,cases.0.samples.0.sample_type,cases.0.submitter_id,file_name,id
0,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-ET-A3DT,be986e1f-ac8c-46ac-bb44-cd1d7115d40d.methylati...,0c7d8ee6-d194-4456-aacc-b9811aeed07b
1,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-ET-A39I,4fae9936-03c0-433a-a733-13813116caea.methylati...,2ab73bad-203d-49af-9aef-3c2a504957d8
2,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-FY-A76V,9ee0c762-ab8f-44f4-8c1e-b2c34d3437e2.methylati...,3ae2d457-b6df-4d22-980a-8fa4937f199c
3,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-DJ-A2PP,051db48e-d2ad-4efd-978c-65ff3f3206f8.methylati...,1a009fe8-32ed-46dd-ae19-b0a65e294df4
4,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-EM-A2CU,76dc1b21-5eec-430f-96f2-60d90f72353d.methylati...,a156eadf-9f66-479a-b2e3-abc5fdb7865f
...,...,...,...,...,...,...,...
10587,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Solid Tissue Normal,TCGA-EL-A3T0,6c577e51-5833-441a-89c1-dbc9ec27769a.methylati...,4027d183-b14c-42ed-aa4f-1cec1a36859a
10588,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-DJ-A2PN,252554ce-1dcd-43de-9405-8ae96d6a1a3f.methylati...,82148510-5714-4a1c-91e7-e7424bb0a770
10589,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-ET-A25P,6f28dec4-1eca-477f-9f42-8aa488a7a838.methylati...,80f4227c-012d-4ad5-9731-5bee8ded6222
10590,Adenomas and Adenocarcinomas,Thyroid gland,TCGA-THCA,Primary Tumor,TCGA-EL-A4JZ,8e9d3671-6173-4afd-8413-254a98f74eae.methylati...,3fbdbf64-043d-41af-9eaf-974c87db26c3
