### Disbiome

In [1]:
# Download Link: https://disbiome.ugent.be:8080/experiment
# Download Date: 2025-03-21
# Download Version: unknown

import pandas as pd
import json
with open('experiment.txt') as f:
    # data type is str
    data = json.load(f)

experiment = pd.DataFrame(data,dtype=str)
experiment = experiment[['meddra_id','organism_ncbi_id']].drop_duplicates()
experiment['meddra_id'] = experiment['meddra_id'].str.split('.').str[0]
experiment['organism_ncbi_id'] = experiment['organism_ncbi_id'].str.split('.').str[0]
experiment.dropna(subset=['organism_ncbi_id'], inplace=True)
experiment.drop_duplicates()
experiment

Unnamed: 0,meddra_id,organism_ncbi_id
0,10080683,1591
1,10080683,885
3,10028245,84112
4,10028245,1308
5,10028245,853
...,...,...
10856,10063024,1506553
10857,10063024,142586
10858,10063024,34104
10863,10063028,1407607


ICD10 to MedDRA

In [2]:
# Download Link: 
# Download Date: 2025-03-21
# Download Version: unknown

import pandas as pd

icd10_meddra = pd.read_excel('ICD-10 to MedDRA 27.1 Map - January 2025.xlsx', sheet_name='Codes Only', dtype=str)
icd10_meddra.columns = icd10_meddra.iloc[0]
icd10_meddra = icd10_meddra[1:]
icd10_meddra = icd10_meddra[icd10_meddra['Map Attribute'] == 'Equivalent']
icd10_meddra = icd10_meddra[['ICD-10 Code 2019 International Core Version','Mapped MedDRA LLT Code\nv27.1']]
icd10_meddra.rename(columns={'ICD-10 Code 2019 International Core Version':'icd10','Mapped MedDRA LLT Code\nv27.1':'meddra_id'},inplace=True)
icd10_meddra.drop_duplicates(inplace=True)
icd10_meddra

Unnamed: 0,icd10,meddra_id
1,A00,10008631
2,A00.0,10080446
3,A00.1,10008633
4,A00.9,10008634
5,A01,10045275
...,...,...
11189,Z99.2,10012347
11190,Z99.3,10047920
11191,Z99.4,10053667
11192,Z99.8,10061102


In [3]:
meddra_unique = icd10_meddra.groupby('meddra_id').agg({'icd10':lambda x:';'.join(x)}).reset_index()
meddra_unique

Unnamed: 0,meddra_id,icd10
0,10000053,I71.3
1,10000054,I71.4
2,10000081,R10.4
3,10000088,O00.0
4,10000090,R19.3
...,...,...
5090,10089748,A08.3
5091,10090364,P59.1
5092,10090623,L40.1
5093,10090629,C84.7


SnomedCT to MedDRA

In [4]:
snomedct_meddra = pd.read_excel('SNOMED CT - MedDRA Mapping - 30 April 2024.xlsx', sheet_name='MedDRA to SNOMED CT Map', dtype=str)
snomedct_meddra = snomedct_meddra[['MedDRA Code','SNOMED CT Code']].drop_duplicates()
snomedct_meddra.rename(columns={'MedDRA Code':'meddra_id','SNOMED CT Code':'snomed_id'},inplace=True)
snomedct_meddra

Unnamed: 0,meddra_id,snomed_id
0,10000051,233985008
1,10000054,233985008
2,10000056,51197009
3,10000057,51197009
4,10000058,21522001
...,...,...
6774,10085306,782555009
6775,10085440,63741006
6776,10085456,410070006
6777,10085737,429356007


Final Disbiome

In [5]:
disbiome = pd.merge(experiment,meddra_unique,on='meddra_id',how='left')
disbiome = pd.merge(disbiome,snomedct_meddra,on='meddra_id',how='left')
disbiome.drop_duplicates(inplace=True)
disbiome

Unnamed: 0,meddra_id,organism_ncbi_id,icd10,snomed_id
0,10080683,1591,,
1,10080683,885,,
2,10028245,84112,G35,24700007
3,10028245,1308,G35,24700007
4,10028245,853,G35,24700007
...,...,...,...,...
8433,10063024,1506553,,
8434,10063024,142586,,
8435,10063024,34104,,
8436,10063028,1407607,,248279007


### HMDAD

In [25]:
# Download Link: http://www.cuilab.cn/files/dmi/data_download.txt
# Download Date: 2025-03-21
# Download Version: unknown

hmdad = pd.read_csv('data_download.txt', sep='\t', dtype=str)
hmdad = hmdad[['Disease', 'Microbe']].drop_duplicates()
hmdad

Unnamed: 0,Disease,Microbe
0,Colon cancer,Collinsella aerofaciens
1,Periodontal,Fusobacterium
2,Periodontal,Porphyromonas gingivalis
3,Periodontal,Prevotella
4,Periodontal,Treponema
...,...,...
478,Liver cirrhosis,Tannerella
479,Liver cirrhosis,Veillonella
480,Liver cirrhosis,Veillonella atypica
481,Liver cirrhosis,Veillonella dispar


### BioMedGraphica ID

In [8]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_microbiota = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Microbiota' / 'BioMedGraphica_Microbiota.csv'
target_dir_disease = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Disease' / 'BioMedGraphica_Disease.csv'
biomedgraphica_microbiota = pd.read_csv(target_dir_microbiota, dtype=str)
biomedgraphica_disease = pd.read_csv(target_dir_disease, dtype=str)

### Disbiome Mapping

ICD10 ID

In [9]:
icd10_individualid = biomedgraphica_disease[['ICD10_ID', 'BioMedGraphica_ID']]
icd10_individualid.dropna(subset=['ICD10_ID'], inplace=True)
icd10_individualid = icd10_individualid.assign(ICD10_ID=icd10_individualid['ICD10_ID'].str.split(';')).explode('ICD10_ID')
icd10_to_individualid = icd10_individualid.groupby('ICD10_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

disbiome['To_ID_ICD10'] = disbiome['icd10'].map(icd10_to_individualid)
disbiome

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  icd10_individualid.dropna(subset=['ICD10_ID'], inplace=True)


Unnamed: 0,meddra_id,organism_ncbi_id,icd10,snomed_id,To_ID_ICD10
0,10080683,1591,,,
1,10080683,885,,,
2,10028245,84112,G35,24700007,BMG_DS044895
3,10028245,1308,G35,24700007,BMG_DS044895
4,10028245,853,G35,24700007,BMG_DS044895
...,...,...,...,...,...
8433,10063024,1506553,,,
8434,10063024,142586,,,
8435,10063024,34104,,,
8436,10063028,1407607,,248279007,


SnomedCT ID

In [10]:
snomedct_individualid = biomedgraphica_disease[['SNOMEDCT_ID', 'BioMedGraphica_ID']]
snomedct_individualid.dropna(subset=['SNOMEDCT_ID'], inplace=True)
snomedct_individualid = snomedct_individualid.assign(SNOMEDCT_ID=snomedct_individualid['SNOMEDCT_ID'].str.split(';')).explode('SNOMEDCT_ID')
snomedct_to_individualid = snomedct_individualid.groupby('SNOMEDCT_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

disbiome['To_ID_SNOMEDCT'] = disbiome['snomed_id'].map(snomedct_to_individualid)
disbiome

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  snomedct_individualid.dropna(subset=['SNOMEDCT_ID'], inplace=True)


Unnamed: 0,meddra_id,organism_ncbi_id,icd10,snomed_id,To_ID_ICD10,To_ID_SNOMEDCT
0,10080683,1591,,,,
1,10080683,885,,,,
2,10028245,84112,G35,24700007,BMG_DS044895,
3,10028245,1308,G35,24700007,BMG_DS044895,
4,10028245,853,G35,24700007,BMG_DS044895,
...,...,...,...,...,...,...
8433,10063024,1506553,,,,
8434,10063024,142586,,,,
8435,10063024,34104,,,,
8436,10063028,1407607,,248279007,,


NCBI Taxonomy ID

In [11]:
ncbi_taxonid_individualid = biomedgraphica_microbiota[['NCBI_Taxonomy_ID', 'BioMedGraphica_ID']]
ncbi_taxonid_individualid.dropna(subset=['NCBI_Taxonomy_ID'], inplace=True)
ncbi_taxonid_individualid = ncbi_taxonid_individualid.assign(NCBI_Taxonomy_ID=ncbi_taxonid_individualid['NCBI_Taxonomy_ID'].str.split(';')).explode('NCBI_Taxonomy_ID')
ncbi_taxonid_to_individualid = ncbi_taxonid_individualid.groupby('NCBI_Taxonomy_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

disbiome['From_ID'] = disbiome['organism_ncbi_id'].map(ncbi_taxonid_to_individualid)
disbiome

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ncbi_taxonid_individualid.dropna(subset=['NCBI_Taxonomy_ID'], inplace=True)


Unnamed: 0,meddra_id,organism_ncbi_id,icd10,snomed_id,To_ID_ICD10,To_ID_SNOMEDCT,From_ID
0,10080683,1591,,,,,BMG_MC205932
1,10080683,885,,,,,BMG_MC598541
2,10028245,84112,G35,24700007,BMG_DS044895,,BMG_MC588851
3,10028245,1308,G35,24700007,BMG_DS044895,,BMG_MC099927
4,10028245,853,G35,24700007,BMG_DS044895,,BMG_MC589185
...,...,...,...,...,...,...,...
8433,10063024,1506553,,,,,BMG_MC178783
8434,10063024,142586,,,,,BMG_MC147967
8435,10063024,34104,,,,,BMG_MC457661
8436,10063028,1407607,,248279007,,,BMG_MC139842


In [12]:
def merge_string_columns(df, columns, merge_name, separator=';'):
    def merge_strings(row):
        combined = set()
        for column in columns:
            if pd.notnull(row[column]):
                combined.update(row[column].split(separator))
        return separator.join(combined)
    
    # Apply the function to each row and create a new column
    combined_column_name = merge_name
    df[combined_column_name] = df.apply(merge_strings, axis=1)
    df.drop(columns=columns, inplace=True)
    
    return df

disbiome_micro_disease = merge_string_columns(disbiome,['To_ID_ICD10', 'To_ID_SNOMEDCT'], 'To_ID')
disbiome_micro_disease = disbiome_micro_disease[['From_ID', 'To_ID']]
disbiome_micro_disease = disbiome_micro_disease.replace('',pd.NA)
disbiome_micro_disease

Unnamed: 0,From_ID,To_ID
0,BMG_MC205932,
1,BMG_MC598541,
2,BMG_MC588851,BMG_DS044895
3,BMG_MC099927,BMG_DS044895
4,BMG_MC589185,BMG_DS044895
...,...,...
8433,BMG_MC178783,
8434,BMG_MC147967,
8435,BMG_MC457661,
8436,BMG_MC139842,


Disbiome Final

In [13]:
disbiome_micro_disease['From_ID'].replace('', pd.NA, inplace=True)
disbiome_micro_disease['To_ID'].replace('', pd.NA, inplace=True)
disbiome_micro_disease.dropna(subset=['From_ID'], inplace=True)
disbiome_micro_disease.dropna(subset=['To_ID'], inplace=True)
disbiome_micro_disease.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3521 entries, 2 to 8419
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   From_ID  3521 non-null   object
 1   To_ID    3521 non-null   object
dtypes: object(2)
memory usage: 82.5+ KB


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  disbiome_micro_disease['From_ID'].replace('', pd.NA, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  disbiome_micro_disease['To_ID'].replace('', pd.NA, inplace=True)


In [14]:
disbiome_micro_disease['From_ID'] = disbiome_micro_disease['From_ID'].str.split(';')
disbiome_micro_disease['To_ID'] = disbiome_micro_disease['To_ID'].str.split(';')

disbiome_micro_disease = disbiome_micro_disease.explode('From_ID')
disbiome_micro_disease = disbiome_micro_disease.explode('To_ID')

disbiome_micro_disease['From_ID'] = disbiome_micro_disease['From_ID'].str.strip()
disbiome_micro_disease['To_ID'] = disbiome_micro_disease['To_ID'].str.strip()
disbiome_micro_disease.drop_duplicates(inplace=True)
disbiome_micro_disease

Unnamed: 0,From_ID,To_ID
2,BMG_MC588851,BMG_DS044895
3,BMG_MC099927,BMG_DS044895
4,BMG_MC589185,BMG_DS044895
5,BMG_MC553746,BMG_DS044895
6,BMG_MC474967,BMG_DS044895
...,...,...
8393,BMG_MC504562,BMG_DS018976
8393,BMG_MC504562,BMG_DS083657
8394,BMG_MC067867,BMG_DS018976
8394,BMG_MC067867,BMG_DS083657


### HMDAD Mapping

Microbiota Name

In [26]:
import re

def clean_text(s):
    if pd.isnull(s):
        return ''
    return re.sub(r'[^a-zA-Z0-9]', '', str(s)).lower()

In [27]:
micro = biomedgraphica_microbiota.copy()

micro['Name_clean'] = micro['NCBI_Taxonomy_Name'].apply(clean_text)
hmdad['Microbe_clean'] = hmdad['Microbe'].apply(clean_text)
hmdad_to_micro = pd.merge(hmdad, micro, left_on='Microbe_clean', right_on='Name_clean',how='left')
hmdad_to_micro = hmdad_to_micro[['Disease', 'Microbe','BioMedGraphica_ID','Microbe_clean']]
hmdad_to_micro = hmdad_to_micro.rename(columns={'BioMedGraphica_ID':'BMG_micro'})
hmdad_to_micro

Unnamed: 0,Disease,Microbe,BMG_micro,Microbe_clean
0,Colon cancer,Collinsella aerofaciens,BMG_MC578719,collinsellaaerofaciens
1,Periodontal,Fusobacterium,BMG_MC589030,fusobacterium
2,Periodontal,Porphyromonas gingivalis,BMG_MC588707,porphyromonasgingivalis
3,Periodontal,Prevotella,BMG_MC588735,prevotella
4,Periodontal,Treponema,BMG_MC198110,treponema
...,...,...,...,...
445,Liver cirrhosis,Tannerella,BMG_MC308262,tannerella
446,Liver cirrhosis,Veillonella,BMG_MC403050,veillonella
447,Liver cirrhosis,Veillonella atypica,BMG_MC475993,veillonellaatypica
448,Liver cirrhosis,Veillonella dispar,BMG_MC475994,veillonelladispar


Disease Name

In [17]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_disease = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Disease' / 'BioMedGraphica_Disease_GUI_Name.csv'
disease_gui = pd.read_csv(target_dir_disease, dtype=str)

In [28]:
def clean_label(s):
    return str(s).strip().lower()

label_to_group = {}

for _, row in disease_gui.iterrows():
    all_labels = str(row['Disease_Name_List'])
    group_labels = [clean_label(l) for l in all_labels.split(' | ') if l.strip()]
    for label in group_labels:
        label_to_group[label] = all_labels

hmdad_to_micro['matched_group'] = hmdad_to_micro['Disease'].apply(lambda x: label_to_group.get(clean_label(x), None))
hmdad_to_micro

Unnamed: 0,Disease,Microbe,BMG_micro,Microbe_clean,matched_group
0,Colon cancer,Collinsella aerofaciens,BMG_MC578719,collinsellaaerofaciens,colon cancer | malignant colon neoplasm
1,Periodontal,Fusobacterium,BMG_MC589030,fusobacterium,
2,Periodontal,Porphyromonas gingivalis,BMG_MC588707,porphyromonasgingivalis,
3,Periodontal,Prevotella,BMG_MC588735,prevotella,
4,Periodontal,Treponema,BMG_MC198110,treponema,
...,...,...,...,...,...
445,Liver cirrhosis,Tannerella,BMG_MC308262,tannerella,"Fibrosis, Liver | Liver Cirrhosis"
446,Liver cirrhosis,Veillonella,BMG_MC403050,veillonella,"Fibrosis, Liver | Liver Cirrhosis"
447,Liver cirrhosis,Veillonella atypica,BMG_MC475993,veillonellaatypica,"Fibrosis, Liver | Liver Cirrhosis"
448,Liver cirrhosis,Veillonella dispar,BMG_MC475994,veillonelladispar,"Fibrosis, Liver | Liver Cirrhosis"


In [29]:
hmdad_to_micro_v1 = hmdad_to_micro.merge(disease_gui,left_on='matched_group',right_on='Disease_Name_List',how='left')
hmdad_to_micro_v1 = hmdad_to_micro_v1.rename(columns={'BioMedGraphica_ID':'BMG_disease'})
hmdad_to_micro_v1 = hmdad_to_micro_v1[['Disease','Microbe','BMG_micro','BMG_disease']]
hmdad_to_micro_v1

Unnamed: 0,Disease,Microbe,BMG_micro,BMG_disease
0,Colon cancer,Collinsella aerofaciens,BMG_MC578719,BMG_DS000469
1,Periodontal,Fusobacterium,BMG_MC589030,BMG_DS080952
2,Periodontal,Fusobacterium,BMG_MC589030,BMG_DS080976
3,Periodontal,Fusobacterium,BMG_MC589030,BMG_DS080978
4,Periodontal,Fusobacterium,BMG_MC589030,BMG_DS081117
...,...,...,...,...
25789,Liver cirrhosis,Tannerella,BMG_MC308262,BMG_DS006709
25790,Liver cirrhosis,Veillonella,BMG_MC403050,BMG_DS006709
25791,Liver cirrhosis,Veillonella atypica,BMG_MC475993,BMG_DS006709
25792,Liver cirrhosis,Veillonella dispar,BMG_MC475994,BMG_DS006709


In [30]:
hmdad_final = hmdad_to_micro_v1[['BMG_micro','BMG_disease']]
hmdad_final = hmdad_final.rename(columns={'BMG_micro':'From_ID', 'BMG_disease':'To_ID'})
hmdad_final

Unnamed: 0,From_ID,To_ID
0,BMG_MC578719,BMG_DS000469
1,BMG_MC589030,BMG_DS080952
2,BMG_MC589030,BMG_DS080976
3,BMG_MC589030,BMG_DS080978
4,BMG_MC589030,BMG_DS081117
...,...,...
25789,BMG_MC308262,BMG_DS006709
25790,BMG_MC403050,BMG_DS006709
25791,BMG_MC475993,BMG_DS006709
25792,BMG_MC475994,BMG_DS006709


HMDAD Final

In [31]:
hmdad_final['From_ID'].replace('', pd.NA, inplace=True)
hmdad_final['To_ID'].replace('', pd.NA, inplace=True)
hmdad_final.dropna(subset=['From_ID'], inplace=True)
hmdad_final.dropna(subset=['To_ID'], inplace=True)
hmdad_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21663 entries, 0 to 25793
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   From_ID  21663 non-null  object
 1   To_ID    21663 non-null  object
dtypes: object(2)
memory usage: 507.7+ KB


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  hmdad_final['From_ID'].replace('', pd.NA, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  hmdad_final['To_ID'].replace('', pd.NA, inplace=True)


In [32]:
hmdad_final['From_ID'] = hmdad_final['From_ID'].str.split(';')
hmdad_final['To_ID'] = hmdad_final['To_ID'].str.split(';')

hmdad_final = hmdad_final.explode('From_ID')
hmdad_final = hmdad_final.explode('To_ID')

hmdad_final['From_ID'] = hmdad_final['From_ID'].str.strip()
hmdad_final['To_ID'] = hmdad_final['To_ID'].str.strip()
hmdad_final.drop_duplicates(inplace=True)
hmdad_final

Unnamed: 0,From_ID,To_ID
0,BMG_MC578719,BMG_DS000469
1,BMG_MC589030,BMG_DS080952
2,BMG_MC589030,BMG_DS080976
3,BMG_MC589030,BMG_DS080978
4,BMG_MC589030,BMG_DS081117
...,...,...
25789,BMG_MC308262,BMG_DS006709
25790,BMG_MC403050,BMG_DS006709
25791,BMG_MC475993,BMG_DS006709
25792,BMG_MC475994,BMG_DS006709


### Microbiota-Disease Relation

In [34]:
disbiome_micro_disease['source1'] = 'Disbiome'
hmdad_final['source2'] = 'HMDAD'

microbiota_disease = pd.merge(disbiome_micro_disease, hmdad_final, how='outer')

In [35]:
def merge_string_columns(df, columns, merge_name, separator=';'):
    def merge_strings(row):
        combined = set()
        for column in columns:
            if pd.notnull(row[column]):
                combined.update(row[column].split(separator))
        return separator.join(combined)
    
    # Apply the function to each row and create a new column
    combined_column_name = merge_name
    df[combined_column_name] = df.apply(merge_strings, axis=1)
    df.drop(columns=columns, inplace=True)
    
    return df

microbiota_disease = merge_string_columns(microbiota_disease, ['source1', 'source2'], 'Source')
microbiota_disease

Unnamed: 0,From_ID,To_ID,Source
0,BMG_MC000001,BMG_DS001524,Disbiome
1,BMG_MC000001,BMG_DS003407,Disbiome
2,BMG_MC000208,BMG_DS061239,Disbiome
3,BMG_MC000208,BMG_DS061240,Disbiome
4,BMG_MC000208,BMG_DS061241,Disbiome
...,...,...,...
22366,BMG_MC619462,BMG_DS000640,Disbiome
22367,BMG_MC619462,BMG_DS029389,Disbiome
22368,BMG_MC619462,BMG_DS029390,Disbiome
22369,BMG_MC619462,BMG_DS044895,Disbiome


In [36]:
microbiota_disease['Type'] = 'Microbiota-Disease'

max_length = len(str(len(microbiota_disease)))
microbiota_disease['BioMedGraphica_ID'] = ['BMG_ED_MCDS' + str(i).zfill(max_length) for i in range(1, len(microbiota_disease) + 1)]
columns = ['BioMedGraphica_ID'] + [col for col in microbiota_disease.columns if col != 'BioMedGraphica_ID']  # re-order columns
microbiota_disease = microbiota_disease[columns]
microbiota_disease

Unnamed: 0,BioMedGraphica_ID,From_ID,To_ID,Source,Type
0,BMG_ED_MCDS00001,BMG_MC000001,BMG_DS001524,Disbiome,Microbiota-Disease
1,BMG_ED_MCDS00002,BMG_MC000001,BMG_DS003407,Disbiome,Microbiota-Disease
2,BMG_ED_MCDS00003,BMG_MC000208,BMG_DS061239,Disbiome,Microbiota-Disease
3,BMG_ED_MCDS00004,BMG_MC000208,BMG_DS061240,Disbiome,Microbiota-Disease
4,BMG_ED_MCDS00005,BMG_MC000208,BMG_DS061241,Disbiome,Microbiota-Disease
...,...,...,...,...,...
22366,BMG_ED_MCDS22367,BMG_MC619462,BMG_DS000640,Disbiome,Microbiota-Disease
22367,BMG_ED_MCDS22368,BMG_MC619462,BMG_DS029389,Disbiome,Microbiota-Disease
22368,BMG_ED_MCDS22369,BMG_MC619462,BMG_DS029390,Disbiome,Microbiota-Disease
22369,BMG_ED_MCDS22370,BMG_MC619462,BMG_DS044895,Disbiome,Microbiota-Disease


In [37]:
import os
from pathlib import Path

# get the current working directory
current_working_dir = Path(os.getcwd()).resolve()

# get the output directory
grandparent_dir = current_working_dir.parent.parent.parent

target_folder = grandparent_dir / 'BioMedGraphica' / 'Relation' / 'Microbiota-Disease'
if not target_folder.exists():
    target_folder.mkdir(parents=True)
    print(f"Folder {target_folder} has been created.")

output_file_path = target_folder / 'BioMedGraphica_Microbiota_Disease.csv'
microbiota_disease.to_csv(output_file_path, index=False)
print(f"Data has been saved to {output_file_path}")

Data has been saved to D:\RA\BMG\BioMedGraphica\Relation\Microbiota-Disease\BioMedGraphica_Microbiota_Disease.csv
