In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, auc, roc_curve
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

import random

import warnings 
warnings.filterwarnings("ignore") 

In [2]:
def genData(df_tmp,df_health,runID_pos,disease,randomState=0):
    df_pos = df_tmp[ ( df_tmp['SampleID'].isin(runID_pos) ) & ( df_tmp['disease'] == disease )].drop_duplicates()
    df_pos['label'] = 1
    
    random.seed(randomState)
    neg_idx = random.sample(range(len(df_health)), len(df_pos))
    df_neg = df_health.loc[neg_idx]
    df_neg['label'] = 0
    
    df_health_new = df_health.loc[~df_health.index.isin(neg_idx)]
    df_health_new = df_health_new.reset_index(drop=True)
    
    data = pd.concat([df_pos,df_neg])
    data = data.reset_index(drop=True)
    return data,df_health_new



In [3]:
def Eval(true, prob):
    fpr, tpr, thresholds = roc_curve(true, prob, pos_label=1)
    auc_tmp = auc(fpr, tpr)

    prob[prob<0.5] = 0
    prob[prob>=0.5] = 1
    f1_tmp = f1_score(true,prob)
    return auc_tmp, f1_tmp

In [4]:
def genData_test(df_tmp,df_health,runID_pos,disease,randomState=0):
    df_pos = df_tmp[ df_tmp['SampleID'].isin(runID_pos ) 
                    & ( df_tmp['disease'] == disease )].drop_duplicates()    
    df_pos['label'] = 1    
    df_pos = df_pos.reset_index(drop=True)
    
    if len(df_pos) > len(df_health):
        random.seed(randomState)
        pos_idx = random.sample(range(len(df_pos)), len(df_health))
    
        data_Multiple = pd.concat([df_pos.loc[pos_idx,df_health.columns],df_health])
    
    else:
        data_Multiple = pd.concat([df_pos[df_health.columns],df_health])
    
    data_Multiple = data_Multiple.reset_index(drop=True)   
    
    return data_Multiple

In [5]:
df_disease = pd.read_csv('patients_ID.csv',low_memory=False)
del df_disease['Unnamed: 0']
df_disease.columns = ['SampleID','disease']

In [6]:
df = pd.read_csv('disease_health.genus_new.Abd',sep='\t',low_memory=False)

disease_SampleID = set(df['SampleID']) & set(df_disease['SampleID'])

In [7]:
df_health = pd.read_csv('health.csv')

In [8]:
df_all = pd.concat([df_disease.loc[df_disease['SampleID'].isin(disease_SampleID)],df_health])
df_all = df_all.reset_index(drop=True)

In [9]:
df_new = df_all.merge(df,how='left',on=['SampleID'])

In [10]:
df_new_s = df_new[df_new['Unclassified']<0.05]
df_new_s = df_new_s.reset_index(drop=True)

In [11]:
df_health_s = df_new_s[df_new_s['disease']=='health']
df_health_s = df_health_s.reset_index(drop=True)

In [12]:
df_health_s.shape

(3274, 2010)

In [13]:
column_names = [x for x in df_new_s.columns if x !='SampleID' and x !='disease' and x !='Unclassified']

In [14]:
disease_sta = pd.DataFrame(df_new_s[['SampleID','disease']].groupby('SampleID',as_index=False).agg(set))

In [15]:
disease_name = 'ibs'
another_name = 'autoimmune'

def isDisease(x):
    if disease_name in x and len(x)==1:
        return 'IBS'
    elif another_name in x and len(x)==1:
        return 'Autoimmune'
    elif disease_name in x and another_name in x:
        return 'Multiple'
    else:
        return 'Other'
    
disease_sta['which_disease'] = disease_sta['disease'].apply(lambda x: isDisease(x))

In [16]:
sum(disease_sta['which_disease']=='Multiple')

385

In [17]:
data_Single_Autoimmune, df_health_new = genData(df_new_s,df_health_s,
                                             set(disease_sta.loc[disease_sta['which_disease']=='Autoimmune','SampleID']),
                                             another_name)   

In [18]:
data_Single_Autoimmune.shape

(936, 2011)

In [19]:
data_Single_Autoimmune[data_Single_Autoimmune['label']==1].shape

(468, 2011)

In [20]:
data_Single_Autoimmune[data_Single_Autoimmune['label']==0].shape

(468, 2011)

In [21]:
data_Multiple = genData_test(df_new_s,data_Single_Autoimmune[data_Single_Autoimmune['label']==0],
                                 set(disease_sta.loc[disease_sta['which_disease']=='Multiple','SampleID']) ,disease_name)

In [22]:
data_Multiple.shape

(853, 2011)

In [23]:
data_Multiple[data_Multiple['label']==1].shape

(385, 2011)

In [24]:
data_Multiple[data_Multiple['label']==0].shape

(468, 2011)

In [25]:
from mvtpy import mvtest

In [26]:
mv = mvtest.mvtest()

In [27]:
res = []
for each in column_names:
    res.append( ( each, mv.test(data_Single_Autoimmune[each], data_Single_Autoimmune['label'])['Tn'] ))

In [28]:
res_Autoimmune = sorted(res,key=lambda x:x[1],reverse=True)

In [29]:
res_Autoimmune_s = [x[0] for x in res_Autoimmune if x[1]>0.712]

In [30]:
res_Autoimmune_s

['Cor_Eggerthella',
 'Ent_Tatumella',
 'Ent_Cronobacter',
 'Ent_Edwardsiella',
 'Tis_WAL_1855D',
 'Pre_Prevotella',
 'Mog_Eubacterium',
 'Bif_Bifidobacterium',
 'Ery_Erysipelatoclostridium',
 'Ent_Escherichia_Shigella',
 'Lac_Tyzzerella',
 'Ent_Enterobacteriaceae_Group',
 'Ent_Raoultella',
 'Pas_Haemophilus',
 'Ent_Enterobacter',
 'Phy_Phyllobacterium',
 'Pep_Romboutsia',
 'Rum_Papillibacter',
 'Ent_Cedecea',
 'Vei_Dialister',
 'Cor_Slackia',
 'Ent_Enterococcus',
 'Ent_Salmonella',
 'Ent_Leminorella',
 'Ent_Arsenophonus',
 'Cam_Campylobacter',
 'Chr_Christensenellaceae_Group',
 'Sta_Staphylococcus',
 'Por_Porphyromonas',
 'Cor_Collinsella',
 'Ent_Serratia',
 'Lac_Eisenbergiella',
 'Clo_Eubacterium',
 'Mor_Enhydrobacter']

In [30]:
res = []
for each in column_names:
    res.append( ( each, mv.test(data_Multiple[each], data_Multiple['label'])['Tn'] ))

In [31]:
res_Multi = sorted(res,key=lambda x:x[1],reverse=True)

In [32]:
res_Multi_s = [x[0] for x in res_Multi if x[1]>0.712]

In [33]:
df_Autoimmune = pd.DataFrame({'genus':[x[0] for x in res_Autoimmune], 'score_s':[x[1] for x in res_Autoimmune]})

In [34]:
df_Autoimmune

Unnamed: 0,genus,score_s
0,Cor_Eggerthella,3.62
1,Ent_Tatumella,3.22
2,Ent_Cronobacter,2.73
3,Ent_Edwardsiella,2.55
4,Tis_WAL_1855D,2.40
...,...,...
2002,Met_LD28,0.00
2003,Rho_Planktomarina,0.00
2004,Rho_Methyloligella,0.00
2005,Phy_Phycisphaeraceae_Group,0.00


In [35]:
df_Multi = pd.DataFrame({'genus':[x[0] for x in res_Multi], 'score_m':[x[1] for x in res_Multi]})

In [36]:
heatMap = df_Multi.merge(df_Autoimmune)

In [37]:
heatMap_s = heatMap[heatMap['genus'].isin(set(res_Multi_s[:10]) | set(res_Autoimmune_s[:10]))]
heatMap_s = heatMap_s.reset_index(drop=True)

In [38]:
heatMap_s

Unnamed: 0,genus,score_m,score_s
0,Lac_Eisenbergiella,8.15,0.75
1,Cor_Eggerthella,5.66,3.62
2,Pre_Prevotella,5.18,2.23
3,Tis_WAL_1855D,4.33,2.4
4,Ent_Cronobacter,4.08,2.73
5,Lac_Hungatella,4.0,0.35
6,Mog_Eubacterium,3.99,2.11
7,Ent_Edwardsiella,3.93,2.55
8,Bif_Bifidobacterium,3.85,2.03
9,Ery_Erysipelatoclostridium,3.19,1.85


# 0.712 is the 99% quantile of asymptotic null distributions of the MV test statistic

In [39]:
heatMap_s['score_m'] = heatMap_s['score_m'].apply(lambda x: 1 if x>0.712 else 0)

In [40]:
heatMap_s['score_s'] = heatMap_s['score_s'].apply(lambda x: 1 if x>0.712 else 0)

In [41]:
heatMap_s

Unnamed: 0,genus,score_m,score_s
0,Lac_Eisenbergiella,1,1
1,Cor_Eggerthella,1,1
2,Pre_Prevotella,1,1
3,Tis_WAL_1855D,1,1
4,Ent_Cronobacter,1,1
5,Lac_Hungatella,1,0
6,Mog_Eubacterium,1,1
7,Ent_Edwardsiella,1,1
8,Bif_Bifidobacterium,1,1
9,Ery_Erysipelatoclostridium,1,1


# Microbial biomarkers obtained by distribution-free independence test was similar between SD and MD

In [42]:
heatMap_s

Unnamed: 0,genus,score_m,score_s
0,Lac_Eisenbergiella,1,1
1,Cor_Eggerthella,1,1
2,Pre_Prevotella,1,1
3,Tis_WAL_1855D,1,1
4,Ent_Cronobacter,1,1
5,Lac_Hungatella,1,0
6,Mog_Eubacterium,1,1
7,Ent_Edwardsiella,1,1
8,Bif_Bifidobacterium,1,1
9,Ery_Erysipelatoclostridium,1,1


# Quartile

In [43]:
data_Single_Autoimmune.shape

(936, 2011)

In [44]:
data_Multiple.shape

(853, 2011)

In [45]:
list(heatMap_s['genus'])

['Lac_Eisenbergiella',
 'Cor_Eggerthella',
 'Pre_Prevotella',
 'Tis_WAL_1855D',
 'Ent_Cronobacter',
 'Lac_Hungatella',
 'Mog_Eubacterium',
 'Ent_Edwardsiella',
 'Bif_Bifidobacterium',
 'Ery_Erysipelatoclostridium',
 'Ent_Escherichia_Shigella',
 'Ent_Tatumella']

In [46]:
data_Single_tmp = data_Single_Autoimmune[list(heatMap_s['genus'])+['label']]

In [47]:
sum(data_Single_tmp['label']==1)

468

In [48]:
sum(data_Single_tmp['label']==0)

468

In [49]:
data_Single_tmp[data_Single_tmp['label']==1].min()

Lac_Eisenbergiella            0.0
Cor_Eggerthella               0.0
Pre_Prevotella                0.0
Tis_WAL_1855D                 0.0
Ent_Cronobacter               0.0
Lac_Hungatella                0.0
Mog_Eubacterium               0.0
Ent_Edwardsiella              0.0
Bif_Bifidobacterium           0.0
Ery_Erysipelatoclostridium    0.0
Ent_Escherichia_Shigella      0.0
Ent_Tatumella                 0.0
label                         1.0
dtype: float64

In [50]:
data_Single_tmp[data_Single_tmp['label']==1].quantile(0.25)

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.000156
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000000
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000024
Ery_Erysipelatoclostridium    0.000000
Ent_Escherichia_Shigella      0.002600
Ent_Tatumella                 0.000000
label                         1.000000
Name: 0.25, dtype: float64

In [51]:
data_Single_tmp[data_Single_tmp['label']==1].median()

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.000661
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000053
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000246
Ery_Erysipelatoclostridium    0.000050
Ent_Escherichia_Shigella      0.054706
Ent_Tatumella                 0.000000
label                         1.000000
dtype: float64

In [52]:
data_Single_tmp[data_Single_tmp['label']==1].quantile(0.75)

Lac_Eisenbergiella            0.000195
Cor_Eggerthella               0.000070
Pre_Prevotella                0.008365
Tis_WAL_1855D                 0.000096
Ent_Cronobacter               0.000274
Lac_Hungatella                0.000015
Mog_Eubacterium               0.000145
Ent_Edwardsiella              0.000249
Bif_Bifidobacterium           0.001590
Ery_Erysipelatoclostridium    0.000568
Ent_Escherichia_Shigella      0.462797
Ent_Tatumella                 0.000208
label                         1.000000
Name: 0.75, dtype: float64

In [53]:
data_Single_tmp[data_Single_tmp['label']==1].max()

Lac_Eisenbergiella            0.088529
Cor_Eggerthella               0.001856
Pre_Prevotella                0.560772
Tis_WAL_1855D                 0.063906
Ent_Cronobacter               0.038901
Lac_Hungatella                0.034345
Mog_Eubacterium               0.021408
Ent_Edwardsiella              0.001570
Bif_Bifidobacterium           0.219451
Ery_Erysipelatoclostridium    0.158302
Ent_Escherichia_Shigella      0.974768
Ent_Tatumella                 0.025229
label                         1.000000
dtype: float64

In [54]:
data_Single_tmp[data_Single_tmp['label']==0].min()

Lac_Eisenbergiella            0.0
Cor_Eggerthella               0.0
Pre_Prevotella                0.0
Tis_WAL_1855D                 0.0
Ent_Cronobacter               0.0
Lac_Hungatella                0.0
Mog_Eubacterium               0.0
Ent_Edwardsiella              0.0
Bif_Bifidobacterium           0.0
Ery_Erysipelatoclostridium    0.0
Ent_Escherichia_Shigella      0.0
Ent_Tatumella                 0.0
label                         0.0
dtype: float64

In [55]:
data_Single_tmp[data_Single_tmp['label']==0].quantile(0.25)

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.000314
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000000
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000089
Ery_Erysipelatoclostridium    0.000000
Ent_Escherichia_Shigella      0.001078
Ent_Tatumella                 0.000000
label                         0.000000
Name: 0.25, dtype: float64

In [56]:
data_Single_tmp[data_Single_tmp['label']==0].median()

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.001749
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000000
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000657
Ery_Erysipelatoclostridium    0.000000
Ent_Escherichia_Shigella      0.007026
Ent_Tatumella                 0.000000
label                         0.000000
dtype: float64

In [57]:
data_Single_tmp[data_Single_tmp['label']==0].quantile(0.75)

Lac_Eisenbergiella            0.000125
Cor_Eggerthella               0.000000
Pre_Prevotella                0.023660
Tis_WAL_1855D                 0.000196
Ent_Cronobacter               0.000186
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000046
Ent_Edwardsiella              0.000154
Bif_Bifidobacterium           0.003247
Ery_Erysipelatoclostridium    0.000349
Ent_Escherichia_Shigella      0.329562
Ent_Tatumella                 0.000000
label                         0.000000
Name: 0.75, dtype: float64

In [58]:
data_Single_tmp[data_Single_tmp['label']==0].max()

Lac_Eisenbergiella            0.068194
Cor_Eggerthella               0.003580
Pre_Prevotella                0.839054
Tis_WAL_1855D                 0.239647
Ent_Cronobacter               0.012043
Lac_Hungatella                0.013627
Mog_Eubacterium               0.002858
Ent_Edwardsiella              0.001310
Bif_Bifidobacterium           0.858402
Ery_Erysipelatoclostridium    0.016636
Ent_Escherichia_Shigella      0.966805
Ent_Tatumella                 0.020897
label                         0.000000
dtype: float64

In [59]:
data_Multiple_tmp = data_Multiple[list(heatMap_s['genus'])+['label']]

In [60]:
sum(data_Multiple_tmp['label']==1)

385

In [61]:
sum(data_Multiple_tmp['label']==0)

468

In [62]:
data_Multiple_tmp[data_Multiple_tmp['label']==1].min()

Lac_Eisenbergiella            0.0
Cor_Eggerthella               0.0
Pre_Prevotella                0.0
Tis_WAL_1855D                 0.0
Ent_Cronobacter               0.0
Lac_Hungatella                0.0
Mog_Eubacterium               0.0
Ent_Edwardsiella              0.0
Bif_Bifidobacterium           0.0
Ery_Erysipelatoclostridium    0.0
Ent_Escherichia_Shigella      0.0
Ent_Tatumella                 0.0
label                         1.0
dtype: float64

In [63]:
data_Multiple_tmp[data_Multiple_tmp['label']==1].quantile(0.25)

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.000105
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000000
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000015
Ery_Erysipelatoclostridium    0.000000
Ent_Escherichia_Shigella      0.002760
Ent_Tatumella                 0.000000
label                         1.000000
Name: 0.25, dtype: float64

In [64]:
data_Multiple_tmp[data_Multiple_tmp['label']==1].median()

Lac_Eisenbergiella            0.000093
Cor_Eggerthella               0.000000
Pre_Prevotella                0.000396
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000087
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000031
Bif_Bifidobacterium           0.000168
Ery_Erysipelatoclostridium    0.000092
Ent_Escherichia_Shigella      0.103443
Ent_Tatumella                 0.000000
label                         1.000000
dtype: float64

In [65]:
data_Multiple_tmp[data_Multiple_tmp['label']==1].quantile(0.75)

Lac_Eisenbergiella            0.000591
Cor_Eggerthella               0.000110
Pre_Prevotella                0.003625
Tis_WAL_1855D                 0.000044
Ent_Cronobacter               0.000287
Lac_Hungatella                0.000123
Mog_Eubacterium               0.000178
Ent_Edwardsiella              0.000267
Bif_Bifidobacterium           0.001150
Ery_Erysipelatoclostridium    0.000823
Ent_Escherichia_Shigella      0.498781
Ent_Tatumella                 0.000203
label                         1.000000
Name: 0.75, dtype: float64

In [66]:
data_Multiple_tmp[data_Multiple_tmp['label']==1].max()

Lac_Eisenbergiella            0.016541
Cor_Eggerthella               0.002021
Pre_Prevotella                0.436239
Tis_WAL_1855D                 0.092139
Ent_Cronobacter               0.013894
Lac_Hungatella                0.018164
Mog_Eubacterium               0.013451
Ent_Edwardsiella              0.001624
Bif_Bifidobacterium           0.528050
Ery_Erysipelatoclostridium    0.016438
Ent_Escherichia_Shigella      0.983706
Ent_Tatumella                 0.036923
label                         1.000000
dtype: float64

In [67]:
data_Multiple_tmp[data_Multiple_tmp['label']==0].min()

Lac_Eisenbergiella            0.0
Cor_Eggerthella               0.0
Pre_Prevotella                0.0
Tis_WAL_1855D                 0.0
Ent_Cronobacter               0.0
Lac_Hungatella                0.0
Mog_Eubacterium               0.0
Ent_Edwardsiella              0.0
Bif_Bifidobacterium           0.0
Ery_Erysipelatoclostridium    0.0
Ent_Escherichia_Shigella      0.0
Ent_Tatumella                 0.0
label                         0.0
dtype: float64

In [68]:
data_Multiple_tmp[data_Multiple_tmp['label']==0].quantile(0.25)

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.000314
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000000
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000089
Ery_Erysipelatoclostridium    0.000000
Ent_Escherichia_Shigella      0.001078
Ent_Tatumella                 0.000000
label                         0.000000
Name: 0.25, dtype: float64

In [69]:
data_Multiple_tmp[data_Multiple_tmp['label']==0].median()

Lac_Eisenbergiella            0.000000
Cor_Eggerthella               0.000000
Pre_Prevotella                0.001749
Tis_WAL_1855D                 0.000000
Ent_Cronobacter               0.000000
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000000
Ent_Edwardsiella              0.000000
Bif_Bifidobacterium           0.000657
Ery_Erysipelatoclostridium    0.000000
Ent_Escherichia_Shigella      0.007026
Ent_Tatumella                 0.000000
label                         0.000000
dtype: float64

In [70]:
data_Multiple_tmp[data_Multiple_tmp['label']==0].quantile(0.75)

Lac_Eisenbergiella            0.000125
Cor_Eggerthella               0.000000
Pre_Prevotella                0.023660
Tis_WAL_1855D                 0.000196
Ent_Cronobacter               0.000186
Lac_Hungatella                0.000000
Mog_Eubacterium               0.000046
Ent_Edwardsiella              0.000154
Bif_Bifidobacterium           0.003247
Ery_Erysipelatoclostridium    0.000349
Ent_Escherichia_Shigella      0.329562
Ent_Tatumella                 0.000000
label                         0.000000
Name: 0.75, dtype: float64

In [71]:
data_Multiple_tmp[data_Multiple_tmp['label']==0].max()

Lac_Eisenbergiella            0.068194
Cor_Eggerthella               0.003580
Pre_Prevotella                0.839054
Tis_WAL_1855D                 0.239647
Ent_Cronobacter               0.012043
Lac_Hungatella                0.013627
Mog_Eubacterium               0.002858
Ent_Edwardsiella              0.001310
Bif_Bifidobacterium           0.858402
Ery_Erysipelatoclostridium    0.016636
Ent_Escherichia_Shigella      0.966805
Ent_Tatumella                 0.020897
label                         0.000000
dtype: float64

# First tree of lgb

In [72]:
clf = lgb.LGBMClassifier(learning_rate=0.02,max_depth=5,n_estimators=1000,random_state=2021,num_leaves=32,
                         n_jobs=2,subsample=0.8,subsample_freq=5,colsample_bytree=0.8)

In [73]:
std_ = data_Single_Autoimmune[column_names].std()
feats = [x for x in data_Single_Autoimmune[column_names].columns if x not in list(std_[std_==0].index)]

In [74]:
clf.fit(data_Single_Autoimmune[feats],data_Single_Autoimmune['label'])

LGBMClassifier(colsample_bytree=0.8, learning_rate=0.02, max_depth=5,
               n_estimators=1000, n_jobs=2, num_leaves=32, random_state=2021,
               subsample=0.8, subsample_freq=5)

In [75]:
graph = lgb.create_tree_digraph(clf, tree_index=0, name='Tree_Autoimmune_SD')
graph.render(view=True)

'Tree_Autoimmune_SD.gv.pdf'

In [76]:
std_ = data_Multiple[column_names].std()
feats = [x for x in data_Multiple[column_names].columns if x not in list(std_[std_==0].index)]

In [77]:
clf.fit(data_Multiple[feats],data_Multiple['label'])

LGBMClassifier(colsample_bytree=0.8, learning_rate=0.02, max_depth=5,
               n_estimators=1000, n_jobs=2, num_leaves=32, random_state=2021,
               subsample=0.8, subsample_freq=5)

In [78]:
graph = lgb.create_tree_digraph(clf, tree_index=0, name='Tree_Autoimmune_MD')
graph.render(view=True)

'Tree_Autoimmune_MD.gv.pdf'