In [None]:
token='14bf79bf-c244-4da8-b052-51cbf5546e'
json_str='{"studyIds": ["mixed_tianhua_DX0503_2020",  "tianhua_system_DX0503"], "pageSize": 999999, "pageIndex": 1, "attributesRangeFilters": [], "attributesEqualFilters": [], "mutationFilter": {"hugoGeneSymbols": [], "exacStart": 0, "exadEnd": 1, "vabundStart": 0, "vabundEnd": 1, "variantSource": [], "variantType": [], "variantClass": [], "sequencer": [], "sequencerSource": [], "searchStr": ""}}'
host='https://peta.bgi.com/api'

In [None]:
import pandas as pd
from pypeta import Peta
import pypeta
import plotly.express as px
import json
import numpy as np
from IPython.display import display

In [None]:
try:
    peta=Peta(token=token,host=host)
    peta.set_data_restriction_from_json_string(json_str)
    cli=peta.fetch_clinical_data()
    mut=peta.fetch_mutation_data()
except:
    print('Failed to fetch data.')

# 肿瘤个体化诊疗基因检测统计

## 样本选取条件

In [None]:
try:
    print(pypeta.filter_description(json_str))
except:
    print('Data selected don`t support this calculation.')

## 送检样本量

In [None]:
try:
    print(f'样本总量为{len(cli)}例。')
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    vc=pypeta.restrict_series_value_counts_to_designated_records(cli.CANCER_TYPE.value_counts())

    reserved_cancer_type=list(vc.iloc[:-1,0]) if vc.iat[-1,0]=='Others' else list(vc.iloc[:,0])

    print('分癌种的样本量为：')
    fig = px.pie(vc, values='数量', names='类别')
    fig.update_traces( texttemplate = "%{label}: %{value} <br>%{percent}")
    fig.update_layout(showlegend=False)
    fig.show()

except:
    print('Data selected don`t support this calculation.')

## 药物检测阳性率

In [None]:
#判断数据集是否支持药物阳性率的统计
try:
    support_for_drug_sensitivity=False
    if 'GENETIC_TEST_RESULT' in cli.columns:
        support_for_drug_sensitivity=True
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    if support_for_drug_sensitivity:
        pr=pypeta.positive_rate(cli.GENETIC_TEST_RESULT,['阳性'])

        print(f'总例数为{pr[0]}，其中有效{pr[1]}例，阳性率为{pr[2]:8.2%}。')
    else:
        print("数据不支持该选项。")
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    x=cli['GENETIC_TEST_RESULT'].groupby(cli['CANCER_TYPE']).apply(lambda x: pypeta.positive_rate(x,['阳性'])[2])[reserved_cancer_type].sort_values()

    xdf=pd.DataFrame(x).reset_index()


    print('各个癌种的药物阳性率为：')
    fig = px.bar(xdf, x='CANCER_TYPE', y='GENETIC_TEST_RESULT',text='GENETIC_TEST_RESULT',labels={
                         'CANCER_TYPE':'癌种',
                         'GENETIC_TEST_RESULT':'药物检出阳性率',
                     })
    fig.update_traces(texttemplate='%{text:%.2f%%}', textposition='outside',)
    fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
    fig.update_layout()
    fig.show()
except:
    print('Data selected don`t support this calculation.')

## 基因检出率 

In [None]:
try:
    mut_freq_per_gene_df=pypeta.mut_freq_per_gene(mut).reset_index()

    mut_freq_per_gene_df.columns=pd.Index(['基因','频率'])
    print('各基因的检出率为：')
    fig = px.bar(pypeta.mut_freq_per_gene_df[:20], x='基因', y='频率',text='频率')
    fig.update_traces(texttemplate='%{text:%.2f%%}', textposition='outside',)
    fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()
except:
    print('Data selected don`t support this calculation.')

## 基因突变类型检出率

In [None]:
print('暂未支持。')

## TMB分布 

In [None]:
try:
    tmb_tmp_df=cli[['CANCER_TYPE','TMB']]

    tmb_tmp_df=tmb_tmp_df.dropna(subset=['TMB'])

    tmb_tmp_df=tmb_tmp_df[tmb_tmp_df.TMB.map(lambda x: pypeta.is_float(x))]

    tmb_tmp_df.loc[:,'TMB']=tmb_tmp_df.TMB.astype('float')

    tmb_describe_df=tmb_tmp_df.TMB.groupby(tmb_tmp_df.CANCER_TYPE).describe().T

    tmb_describe_df=tmb_describe_df[reserved_cancer_type]

    tmb_describe_df['全部']=tmb_tmp_df.TMB.describe()
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    print(f'存在TMB记录的样本共{len(tmb_tmp_df)}例。')
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    
    print(f'全部样本及样本量前20的分癌种样本TMB统计值如下：')
    pd.set_option('display.width',1000) 
    # 小技巧：在try模块中直接打印变量名，Out中不会输出变量内容。此时使用print函数生成的内容是未格式化的表格。可以使用IPython.display下的display函数来实现格式化的输出
    display(tmb_describe_df.T)
except:
    print('Data selected don`t support this calculation.')



In [None]:
try:
    total_tmp_tdf=cli.TMB[cli.TMB.map(lambda x: pypeta.is_float(x))].reset_index()

    print('全部样本的TMB分布如下:')
    fig=px.histogram(total_tmp_tdf, x='TMB',labels={'x':'TMB','y':'百分比'},histnorm='probability density',marginal="rug",)
    fig.show()
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    tmb_tmp_df=tmb_tmp_df[tmb_tmp_df.CANCER_TYPE.isin(reserved_cancer_type)]

    tmb_tmp_df.columns=pd.Index(['癌症类型','TMB'])

    print('分癌种的TMB分布如下：')
    fig=px.violin(tmb_tmp_df,x='癌症类型', y="TMB", box=True, # draw box plot inside the violin
                    points='all', # can be 'outliers', or False
                   )
    fig.show()
except:
    print('Data selected don`t support this calculation.')

## MSI分布

In [None]:
try:
    msi_tmp_df=cli[['CANCER_TYPE','MSI_STATUS']]

    msi_tmp_df=msi_tmp_df.dropna(subset=['MSI_STATUS'])

    print(f'存在MSI记录的样本共{len(msi_tmp_df)}例。')
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    print(f'其分布如下：')


    fig= px.pie(msi_tmp_df.MSI_STATUS.value_counts().reset_index(), values='MSI_STATUS', names='index')
    fig.update_traces( texttemplate = "%{label}: %{value} <br>%{percent}")
    fig.update_layout(showlegend=False)
    fig.show()
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    msi_tmp_df=msi_tmp_df[msi_tmp_df.CANCER_TYPE.isin(reserved_cancer_type)]

    tmp=msi_tmp_df.MSI_STATUS.groupby(msi_tmp_df.CANCER_TYPE).value_counts().reset_index('CANCER_TYPE')

    tmp=tmp.rename(columns={'MSI_STATUS':'COUNT'})

    tmp=tmp.reset_index()

    print('分癌种的MSI分布如下：')
    fig=px.sunburst(tmp,path=['CANCER_TYPE','MSI_STATUS'],values='COUNT')
    fig.update_traces( texttemplate = "%{label}: %{value} ")
    fig.update_layout(showlegend=False)
    fig.show()
except:
    print('Data selected don`t support this calculation.')

In [None]:
try:
    msi_tmp_df.columns=pd.Index(['癌症类型','MSI状态'])

    fig = px.parallel_categories(msi_tmp_df,)
    fig.show()
except:
    print('Data selected don`t support this calculation.')