### Ensembl

In [39]:
import pandas as pd
from pybiomart import Server

# List all available attributes
def list_attributes():
    server = Server(host='http://www.ensembl.org')
    dataset = server.marts['ENSEMBL_MART_ENSEMBL'].datasets['hsapiens_gene_ensembl']
    attributes = dataset.list_attributes()
    return attributes

attributes = list_attributes()

def fetch_ensembl_data(attributes):
    server = Server(host='http://www.ensembl.org')
    #https://www.ensembl.org/biomart/martservice?type=datasets&mart=ENSEMBL_MART_ENSEMBL
    #this link shows that hsapiens_gene_ensembl is the GRCh38.p14
    dataset = server.marts['ENSEMBL_MART_ENSEMBL'].datasets['hsapiens_gene_ensembl']
    
    response = dataset.query(attributes)
    
    return response

attributes=['ensembl_transcript_id', 'ensembl_peptide_id']
data_ensembl = fetch_ensembl_data(attributes)
data_ensembl.to_csv('transcript_protein.csv', index=False)

In [1]:
# Download Link: API
# Download Date: 2025-03-21
# Download Version: 2025-03-21
import pandas as pd

df_ensembl_transcript_protein = pd.read_csv('transcript_protein.csv')
df_ensembl_transcript_protein

Unnamed: 0,Transcript stable ID,Protein stable ID
0,ENST00000387314,
1,ENST00000389680,
2,ENST00000387342,
3,ENST00000387347,
4,ENST00000386347,
...,...,...
412029,ENST00000831127,
412030,ENST00000466430,
412031,ENST00000477740,
412032,ENST00000471248,


### Uniprot

In [5]:
import requests
from io import StringIO

def fetch_uniprot_data(params):
    url = "https://rest.uniprot.org/uniprotkb/stream"

    response = requests.get(url, params=params)

    if response.ok:
        tsv_data = StringIO(response.text)
        df = pd.read_csv(tsv_data, sep='\t')
        return df
    else:
        print("Failed to fetch data:", response.status_code)
        print(response.text)
        return None
# change the parameters to fetch the data
# load protein information
params = {
        'fields': 'accession,xref_ensembl',
        'format': 'tsv',
        'query': '(model_organism:9606) AND (reviewed:true)',
        'sort': 'organism_name asc'
    }

df_uniprot = fetch_uniprot_data(params)
if df_uniprot is not None:
    print(df_uniprot)
else:
    print("No data retrieved.")
#save the data to a CSV file
df_uniprot.to_csv('uniprot_transcript_protein.csv', index=False)

            Entry                               Ensembl
0      A0A024R1R8                    ENST00000424496.3;
1      A0A024RBG1                    ENST00000322209.5;
2      A0A075B6H7  ENST00000390247.2;ENST00000633250.1;
3      A0A075B6H8                    ENST00000390278.3;
4      A0A075B6H9                    ENST00000390282.2;
...           ...                                   ...
20412      U3KPV4                    ENST00000442999.3;
20413      W5XKT8         ENST00000637797.2 [W5XKT8-1];
20414      W6CW81                    ENST00000696987.1;
20415      X6R8D5                    ENST00000372963.4;
20416      X6R8R1  ENST00000623662.4;ENST00000673006.3;

[20417 rows x 2 columns]


In [2]:
# Download Link: API
# Download Date: 2025-03-21
# Download Version: 2025-03-21

df_uniprot_t_p = pd.read_csv('uniprot_transcript_protein.csv', sep=',')
df_uniprot_t_p.dropna(subset=['Ensembl'], inplace=True)
df_uniprot_t_p

Unnamed: 0,Entry,Ensembl
0,A0A024R1R8,ENST00000424496.3;
1,A0A024RBG1,ENST00000322209.5;
2,A0A075B6H7,ENST00000390247.2;ENST00000633250.1;
3,A0A075B6H8,ENST00000390278.3;
4,A0A075B6H9,ENST00000390282.2;
...,...,...
20412,U3KPV4,ENST00000442999.3;
20413,W5XKT8,ENST00000637797.2 [W5XKT8-1];
20414,W6CW81,ENST00000696987.1;
20415,X6R8D5,ENST00000372963.4;


In [3]:
import re

df_uniprot_t_p['Ensembl'] = df_uniprot_t_p['Ensembl'].str.split(';')
df_uniprot_t_p = df_uniprot_t_p.explode('Ensembl')

df_uniprot_t_p['Ensembl'] = df_uniprot_t_p['Ensembl'].str.strip()
df_uniprot_t_p.dropna(subset=['Ensembl'])
df_uniprot_t_p = df_uniprot_t_p[df_uniprot_t_p['Ensembl'] != '']
df_uniprot_t_p['Ensembl'] = df_uniprot_t_p['Ensembl'].apply(lambda x: re.sub(r'\[.*?\]', '', x).strip())
df_uniprot_t_p

Unnamed: 0,Entry,Ensembl
0,A0A024R1R8,ENST00000424496.3
1,A0A024RBG1,ENST00000322209.5
2,A0A075B6H7,ENST00000390247.2
2,A0A075B6H7,ENST00000633250.1
3,A0A075B6H8,ENST00000390278.3
...,...,...
20413,W5XKT8,ENST00000637797.2
20414,W6CW81,ENST00000696987.1
20415,X6R8D5,ENST00000372963.4
20416,X6R8R1,ENST00000623662.4


### RefSeq

In [4]:
# Download Link: https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/RefSeqGene/LRG_RefSeqGene
# Download Date: 2025-03-21
# Download Version: 2025-03-21
df_refseq_t_p = pd.read_csv('LRG_RefSeqGene.txt', sep='\t')
df_refseq_t_p = df_refseq_t_p[['RNA', 'Protein']]
df_refseq_t_p.dropna(subset=['Protein'], inplace=True)
df_refseq_t_p

Unnamed: 0,RNA,Protein
0,NM_014576.4,NP_055391.2
1,NM_138932.3,NP_620310.1
2,NM_138933.3,NP_620311.1
3,NM_001198818.2,NP_001185747.1
4,NM_001198819.2,NP_001185748.1
...,...,...
33416,NM_020928.2,NP_065979.1
33417,NM_007057.4,NP_008988.2
33418,NM_032997.3,NP_127490.1
33419,NM_001005413.1,NP_001005413.1


### BioMedGraphica ID

In [5]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_protein = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Protein' / 'BioMedGraphica_Protein.csv'
target_dir_transcript = grandjson_dir = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Transcript' / 'BioMedGraphica_Transcript.csv'
biomedgraphica_protein = pd.read_csv(target_dir_protein, dtype=str)
biomedgraphica_transcript = pd.read_csv(target_dir_transcript, dtype=str)

### Ensembl Mapping

In [6]:
ensembl_trans = biomedgraphica_transcript[['Ensembl_Transcript_ID', 'BioMedGraphica_ID']]
ensembl_trans.dropna(subset = ['Ensembl_Transcript_ID'], inplace=True)
ensembl_trans = ensembl_trans.assign(Ensembl_Transcript_ID=biomedgraphica_transcript['Ensembl_Transcript_ID'].str.split(';')).explode('Ensembl_Transcript_ID')

ensembl_transcript_to_individualid = ensembl_trans.groupby('Ensembl_Transcript_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ensembl_trans.dropna(subset = ['Ensembl_Transcript_ID'], inplace=True)


In [7]:
ensembl_protein = biomedgraphica_protein[['Ensembl_Protein_ID', 'BioMedGraphica_ID']]
ensembl_protein.dropna(subset = ['Ensembl_Protein_ID'], inplace=True)
ensembl_protein = ensembl_protein.assign(Ensembl_Protein_ID=biomedgraphica_protein['Ensembl_Protein_ID'].str.split(';')).explode('Ensembl_Protein_ID')

ensembl_protein_to_individualid = ensembl_protein.groupby('Ensembl_Protein_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ensembl_protein.dropna(subset = ['Ensembl_Protein_ID'], inplace=True)


In [8]:
df_ensembl_transcript_protein['From_ID'] = df_ensembl_transcript_protein['Transcript stable ID'].map(ensembl_transcript_to_individualid)
df_ensembl_transcript_protein['To_ID'] = df_ensembl_transcript_protein['Protein stable ID'].map(ensembl_protein_to_individualid)

In [9]:
df_ensembl_t_p = df_ensembl_transcript_protein[['From_ID', 'To_ID']]
df_ensembl_t_p.dropna(subset=['From_ID'], inplace=True)
df_ensembl_t_p.dropna(subset=['To_ID'], inplace=True)
df_ensembl_t_p.info()

<class 'pandas.core.frame.DataFrame'>
Index: 123845 entries, 5 to 410293
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype 
---  ------   --------------   ----- 
 0   From_ID  123845 non-null  object
 1   To_ID    123845 non-null  object
dtypes: object(2)
memory usage: 2.8+ MB


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ensembl_t_p.dropna(subset=['From_ID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ensembl_t_p.dropna(subset=['To_ID'], inplace=True)


In [10]:
# Split and explode individualid_RSG
df_ensembl_t_p['From_ID'] = df_ensembl_t_p['From_ID'].str.split(';')
df_ensembl_t_p = df_ensembl_t_p.explode('From_ID').reset_index(drop=True)

# Split and explode individualid_RNA
df_ensembl_t_p['To_ID'] = df_ensembl_t_p['To_ID'].str.split(';')
df_ensembl_t_p = df_ensembl_t_p.explode('To_ID').reset_index(drop=True)

df_ensembl_t_p.drop_duplicates(inplace=True)
df_ensembl_t_p

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ensembl_t_p['From_ID'] = df_ensembl_t_p['From_ID'].str.split(';')


Unnamed: 0,From_ID,To_ID
0,BMG_TS017690,BMG_PT037861
1,BMG_TS017717,BMG_PT037862
2,BMG_TS017798,BMG_PT037567
3,BMG_TS017858,BMG_PT037568
4,BMG_TS017908,BMG_PT037868
...,...,...
123853,BMG_TS069452,BMG_PT039294
123854,BMG_TS034260,BMG_PT039294
123855,BMG_TS016488,BMG_PT039294
123856,BMG_TS024001,BMG_PT039294


### Uniprot Mapping

In [11]:
ensembl_trans = biomedgraphica_transcript[['Ensembl_Transcript_ID_Version', 'BioMedGraphica_ID']]
ensembl_trans.dropna(subset = ['Ensembl_Transcript_ID_Version'], inplace=True)
ensembl_trans = ensembl_trans.assign(Ensembl_Transcript_ID_Version=biomedgraphica_transcript['Ensembl_Transcript_ID_Version'].str.split(';')).explode('Ensembl_Transcript_ID_Version')

ensembl_transcript_to_individualid = ensembl_trans.groupby('Ensembl_Transcript_ID_Version')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ensembl_trans.dropna(subset = ['Ensembl_Transcript_ID_Version'], inplace=True)


In [12]:
uniprot_protein = biomedgraphica_protein[['Uniprot_ID', 'BioMedGraphica_ID']]
uniprot_protein.dropna(subset = ['Uniprot_ID'], inplace=True)
uniprot_protein = uniprot_protein.assign(Uniprot_ID=biomedgraphica_protein['Uniprot_ID'].str.split(';')).explode('Uniprot_ID')

uniprot_protein_to_individualid = uniprot_protein.groupby('Uniprot_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  uniprot_protein.dropna(subset = ['Uniprot_ID'], inplace=True)


In [13]:
df_uniprot_t_p['From_ID'] = df_uniprot_t_p['Ensembl'].map(ensembl_transcript_to_individualid)
df_uniprot_t_p['To_ID'] = df_uniprot_t_p['Entry'].map(uniprot_protein_to_individualid)

In [14]:
df_uniprot_t_p.dropna(subset=['From_ID'], inplace=True)
df_uniprot_t_p.dropna(subset=['To_ID'], inplace=True)
df_uniprot_t_p.info()

<class 'pandas.core.frame.DataFrame'>
Index: 50765 entries, 0 to 20416
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Entry    50765 non-null  object
 1   Ensembl  50765 non-null  object
 2   From_ID  50765 non-null  object
 3   To_ID    50765 non-null  object
dtypes: object(4)
memory usage: 1.9+ MB


In [15]:
df_uniprot_t_p['From_ID'] = df_uniprot_t_p['From_ID'].str.split(';')
df_uniprot_t_p = df_uniprot_t_p.explode('From_ID').reset_index(drop=True)

df_uniprot_t_p['To_ID'] = df_uniprot_t_p['To_ID'].str.split(';')
df_uniprot_t_p = df_uniprot_t_p.explode('To_ID').reset_index(drop=True)

df_uniprot_t_p = df_uniprot_t_p[['From_ID', 'To_ID']]
df_uniprot_t_p.drop_duplicates(inplace=True)
df_uniprot_t_p

Unnamed: 0,From_ID,To_ID
0,BMG_TS047632,BMG_PT000170
1,BMG_TS010412,BMG_PT000774
2,BMG_TS029566,BMG_PT000945
3,BMG_TS206006,BMG_PT000945
4,BMG_TS029574,BMG_PT000946
...,...,...
50760,BMG_TS210078,BMG_PT100796
50761,BMG_TS265137,BMG_PT100799
50762,BMG_TS023262,BMG_PT100959
50763,BMG_TS197785,BMG_PT100961


### RefSeq Mapping

In [16]:
refseq_trans = biomedgraphica_transcript[['RefSeq_ID', 'BioMedGraphica_ID']]
refseq_trans.dropna(subset = ['RefSeq_ID'], inplace=True)
refseq_trans = refseq_trans.assign(RefSeq_ID=biomedgraphica_transcript['RefSeq_ID'].str.split(';')).explode('RefSeq_ID')

refseq_trans_to_individual = refseq_trans.groupby('RefSeq_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refseq_trans.dropna(subset = ['RefSeq_ID'], inplace=True)


In [17]:
refseq_protein = biomedgraphica_protein[['RefSeq_ID', 'BioMedGraphica_ID']]
refseq_protein.dropna(subset = ['RefSeq_ID'], inplace=True)
refseq_protein = refseq_protein.assign(RefSeq_ID=biomedgraphica_protein['RefSeq_ID'].str.split(';')).explode('RefSeq_ID')

refseq_protein_to_individual = refseq_protein.groupby('RefSeq_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refseq_protein.dropna(subset = ['RefSeq_ID'], inplace=True)


In [18]:
df_refseq_t_p['From_ID'] = df_refseq_t_p['RNA'].map(refseq_trans_to_individual)
df_refseq_t_p['To_ID'] = df_refseq_t_p['Protein'].map(refseq_protein_to_individual)

In [19]:
df_refseq_t_p.dropna(subset=['From_ID'], inplace=True)
df_refseq_t_p.dropna(subset=['To_ID'], inplace=True)
df_refseq_t_p.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6868 entries, 0 to 33420
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   RNA      6868 non-null   object
 1   Protein  6868 non-null   object
 2   From_ID  6868 non-null   object
 3   To_ID    6868 non-null   object
dtypes: object(4)
memory usage: 268.3+ KB


In [20]:
df_refseq_t_p = df_refseq_t_p[['From_ID', 'To_ID']]

df_refseq_t_p['From_ID'] = df_refseq_t_p['From_ID'].str.split(';')
df_refseq_t_p = df_refseq_t_p.explode('From_ID').reset_index(drop=True)

df_refseq_t_p['To_ID'] = df_refseq_t_p['To_ID'].str.split(';')
df_refseq_t_p = df_refseq_t_p.explode('To_ID').reset_index(drop=True)
df_refseq_t_p.drop_duplicates(inplace=True)
df_refseq_t_p

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_refseq_t_p['From_ID'] = df_refseq_t_p['From_ID'].str.split(';')


Unnamed: 0,From_ID,To_ID
0,BMG_TS023854,BMG_PT091743
1,BMG_TS009928,BMG_PT037614
2,BMG_TS009928,BMG_PT044515
3,BMG_TS009928,BMG_PT053181
4,BMG_TS009928,BMG_PT055260
...,...,...
35639,BMG_TS023820,BMG_PT036927
35640,BMG_TS023820,BMG_PT043276
35641,BMG_TS023820,BMG_PT086512
35642,BMG_TS016776,BMG_PT042438


### Transcript-Protein Relation

In [21]:
biomedgraphica_ensembl_transcript_protein = df_ensembl_t_p.copy()
biomedgraphica_ensembl_transcript_protein['source1'] = 'Ensembl'

biomedgraphica_uniprot_transcript_protein = df_uniprot_t_p.copy()
biomedgraphica_uniprot_transcript_protein['source2'] = 'Uniprot'

biomedgraphica_refseq_transcript_protein = df_refseq_t_p.copy()
biomedgraphica_refseq_transcript_protein['source3'] = 'RefSeq'

biomedgraphica_transcript_protein = pd.merge(biomedgraphica_ensembl_transcript_protein, biomedgraphica_uniprot_transcript_protein, how='outer', on=['From_ID', 'To_ID'])
biomedgraphica_transcript_protein = pd.merge(biomedgraphica_transcript_protein, biomedgraphica_refseq_transcript_protein, how='outer', on=['From_ID', 'To_ID'])
biomedgraphica_transcript_protein

Unnamed: 0,From_ID,To_ID,source1,source2,source3
0,BMG_TS000001,BMG_PT042398,Ensembl,Uniprot,
1,BMG_TS000002,BMG_PT039543,Ensembl,Uniprot,
2,BMG_TS000003,BMG_PT038917,Ensembl,Uniprot,
3,BMG_TS000004,BMG_PT042655,Ensembl,Uniprot,
4,BMG_TS000005,BMG_PT015283,,,RefSeq
...,...,...,...,...,...
152580,BMG_TS412170,BMG_PT173974,Ensembl,,
152581,BMG_TS412171,BMG_PT173975,Ensembl,,
152582,BMG_TS412172,BMG_PT173976,Ensembl,,
152583,BMG_TS412303,BMG_PT173977,Ensembl,,


In [22]:
def merge_string_columns(df, columns, merge_name, separator=';'):
    def merge_strings(row):
        combined = set()
        for column in columns:
            if pd.notnull(row[column]):
                combined.update(row[column].split(separator))
        return separator.join(combined)
    
    # Apply the function to each row and create a new column
    combined_column_name = merge_name
    df[combined_column_name] = df.apply(merge_strings, axis=1)
    df.drop(columns=columns, inplace=True)
    
    return df

biomedgraphica_transcript_protein = merge_string_columns(biomedgraphica_transcript_protein, ['source1', 'source2', 'source3'], 'Source')
biomedgraphica_transcript_protein

Unnamed: 0,From_ID,To_ID,Source
0,BMG_TS000001,BMG_PT042398,Uniprot;Ensembl
1,BMG_TS000002,BMG_PT039543,Uniprot;Ensembl
2,BMG_TS000003,BMG_PT038917,Uniprot;Ensembl
3,BMG_TS000004,BMG_PT042655,Uniprot;Ensembl
4,BMG_TS000005,BMG_PT015283,RefSeq
...,...,...,...
152580,BMG_TS412170,BMG_PT173974,Ensembl
152581,BMG_TS412171,BMG_PT173975,Ensembl
152582,BMG_TS412172,BMG_PT173976,Ensembl
152583,BMG_TS412303,BMG_PT173977,Ensembl


In [23]:
biomedgraphica_transcript_protein['Type'] = 'Transcript-Protein'
max_length = len(str(len(biomedgraphica_transcript_protein)))
biomedgraphica_transcript_protein['BioMedGraphica_ID'] = ['BMG_ED_TSPT' + str(i).zfill(max_length) for i in range(1, len(biomedgraphica_transcript_protein) + 1)]
columns = ['BioMedGraphica_ID'] + [col for col in biomedgraphica_transcript_protein.columns if col != 'BioMedGraphica_ID']  # re-order columns
biomedgraphica_transcript_protein = biomedgraphica_transcript_protein[columns]
biomedgraphica_transcript_protein

Unnamed: 0,BioMedGraphica_ID,From_ID,To_ID,Source,Type
0,BMG_ED_TSPT000001,BMG_TS000001,BMG_PT042398,Uniprot;Ensembl,Transcript-Protein
1,BMG_ED_TSPT000002,BMG_TS000002,BMG_PT039543,Uniprot;Ensembl,Transcript-Protein
2,BMG_ED_TSPT000003,BMG_TS000003,BMG_PT038917,Uniprot;Ensembl,Transcript-Protein
3,BMG_ED_TSPT000004,BMG_TS000004,BMG_PT042655,Uniprot;Ensembl,Transcript-Protein
4,BMG_ED_TSPT000005,BMG_TS000005,BMG_PT015283,RefSeq,Transcript-Protein
...,...,...,...,...,...
152580,BMG_ED_TSPT152581,BMG_TS412170,BMG_PT173974,Ensembl,Transcript-Protein
152581,BMG_ED_TSPT152582,BMG_TS412171,BMG_PT173975,Ensembl,Transcript-Protein
152582,BMG_ED_TSPT152583,BMG_TS412172,BMG_PT173976,Ensembl,Transcript-Protein
152583,BMG_ED_TSPT152584,BMG_TS412303,BMG_PT173977,Ensembl,Transcript-Protein


In [24]:
import os
from pathlib import Path

# get the current working directory
current_working_dir = Path(os.getcwd()).resolve()

# get the output directory
grandparent_dir = current_working_dir.parent.parent.parent

target_folder = grandparent_dir / 'BioMedGraphica' / 'Relation' / 'Transcript-Protein'
if not target_folder.exists():
    target_folder.mkdir(parents=True)
    print(f"Folder {target_folder} has been created.")

output_file_path = target_folder / 'BioMedGraphica_Transcript_Protein.csv'
biomedgraphica_transcript_protein.to_csv(output_file_path, index=False)
print(f"Data has been saved to {output_file_path}")

Data has been saved to D:\RA\BMG\BioMedGraphica\Relation\Transcript-Protein\BioMedGraphica_Transcript_Protein.csv
