In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from rdflib import Graph
from rdflib.namespace import RDF
import re
import sys

from rdflib.plugins.sparql.processor import SPARQLResult

def sparql_results_to_df(results: SPARQLResult) -> pd.DataFrame:
    """
    Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
    using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
    """
    return pd.DataFrame(
        data=([None if x is None else x.toPython() for x in row] for row in results),
        columns=[str(x) for x in results.vars],
    )

In [2]:
def adding_prefix(df_g):
    df_g.replace('http://www.project-improvit.de/Donor/', 'Donor:', regex=True, inplace=True)
    df_g.replace('http://www.project-improvit.de/vocab/', 'improvit:', regex=True, inplace=True)
    df_g.replace('http://www.project-improvit.de/Measurement/', 'Measurement:', regex=True, inplace=True)
    df_g.replace('http://www.project-improvit.de/FunctionalCure/', '', regex=True, inplace=True)
    df_g.replace('http://www.project-improvit.de/Parameter/', 'Parameter:', regex=True, inplace=True)
    df_g.replace('http://www.w3.org/2001/XMLSchema#decimal', '', regex=True, inplace=True)
    df_g.replace('>', '', regex=True, inplace=True)
    df_g.replace('<', '', regex=True, inplace=True)
    return df_g

In [3]:
g1 = Graph()
g1.parse("../../store_data/ImProVIT/All_Measurement_parameters_D0_HBsRE_v5_1.nt", format="nt")
len(g1)

32551

# Query to transform graph into a table

In [4]:
query = """
prefix improvit: <http://www.project-improvit.de/vocab/> 
select distinct ?donor
    where{
        ?donor rdf:type improvit:Donor .
    }
"""
qres = g1.query(query)
donor = sparql_results_to_df(qres)
donor = adding_prefix(donor)
display(donor.shape, donor.head(2))

(87, 1)

Unnamed: 0,donor
0,Donor:HBsRE_1
1,Donor:HBsRE_10


### Create two DataFrame of measures, one for measures with one protocol and another one for measures with different protocols

In [5]:
query = """
prefix donor: <http://www.project-improvit.de/Donor/>
prefix measurement: <http://www.project-improvit.de/Measurement/> 
prefix improvit: <http://www.project-improvit.de/vocab/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

select distinct ?donor ?p (count(?measure) as ?num)
    where{
        ?donor rdf:type improvit:Donor .
        ?measure rdf:type improvit:Measurement .
        ?donor ?p ?measure .
        ?measure improvit:hasExpProtocolName ?protocol .
    }
    GROUP BY ?donor ?p
    ORDER BY DESC(?num)
"""

qres = g1.query(query)
q1 = sparql_results_to_df(qres)
q1 = adding_prefix(q1)
display(q1.head())

df = q1.loc[q1.num==1][['p']]
df.drop_duplicates(inplace=True)
# add Row to DataFrame
new_row = [{'p':'improvit:hasSex'}, {'p':'improvit:hasAge'}, {'p': 'improvit:hasTreatment'}]
df = df.append(new_row, ignore_index=True)

df_protocol = q1.loc[q1.num>1][['p']]
df_protocol.drop_duplicates(inplace=True)

display(df.shape, df.head(2), df_protocol.shape, df_protocol.head(2))

Unnamed: 0,donor,p,num
0,Donor:HBsRE_38,improvit:measurementLeadsToCD19neg_CD14neg_CD3...,12
1,Donor:HBsRE_11,improvit:measurementLeadsToCD19neg_CD14neg_CD3...,12
2,Donor:HBsRE_13,improvit:measurementLeadsToCD19neg_CD14neg_CD3...,12
3,Donor:HBsRE_12,improvit:measurementLeadsToCD19neg_CD14neg_CD3...,12
4,Donor:HBsRE_31,improvit:measurementLeadsToCD19neg_CD14neg_CD3...,12


(58, 1)

Unnamed: 0,p
0,improvit:measurementLeadsToAlbuminResults
1,improvit:measurementLeadsToALTResults


(8, 1)

Unnamed: 0,p
0,improvit:measurementLeadsToCD19neg_CD14neg_CD3...
35,improvit:measurementLeadsToCD19neg_CD14neg_CD3...


In [6]:
def dict_properties(df):
    prop_list = df.p.to_list()
    l_replace = [s.replace('improvit:measurementLeadsTo', '') for s in prop_list]
    l_replace = [s.replace('Results', '') for s in l_replace]
    l_replace = [s.replace('improvit:', '') for s in l_replace]

    dict_prop = dict(zip(prop_list, l_replace))
    return dict_prop

def generate_query_by_donor(donor, dict_prop):
    query_by_donor = []
    triple = """"""
    project = """"""

    for k in range(donor.shape[0]):
        triple = """"""
        project = """"""
        for key, value in dict_prop.items():
            name = value.replace('.', '')
            triple+="""OPTIONAL{ """ +donor.iloc[k].donor+ """ """ +key+ """ ?"""+name+""" .}\n"""
            project+=""" ?"""+name
        query_by_donor.append([triple, project])
    return query_by_donor

def extract_donor_description(g1, query_by_donor):
    df_donor = pd.DataFrame()
    for k in range(len(query_by_donor)):
        triple, project = query_by_donor[k][0], query_by_donor[k][1]
        query = """
        prefix improvit: <http://www.project-improvit.de/vocab/>
        prefix Donor: <http://www.project-improvit.de/Donor/>
        select distinct """ +project+"""
            where{ """+triple+"""
            }
        """
    #     print(query)
        qres = g1.query(query)
        df = sparql_results_to_df(qres)
        df = adding_prefix(df)
        df['donor'] = donor.iloc[k].donor
        df_donor = pd.concat([df_donor, df])

        if df.shape[0]==0:
            print(donor.iloc[k])
    df_donor.reset_index(drop=True, inplace=True)
    return df_donor

 
def is_float(string):
    # Compile a regular expression pattern to match valid float values
    pattern = r"^[-+]?[0-9]*\.?[0-9]+$"
     
    # Use re.match to check if the string matches the pattern
    # Returns a match object if there is a match, else None
    match = re.match(pattern, string)
     
    # Convert the match object to a boolean value
    # Returns True if there is a match, else False
    return bool(match)

In [7]:
dict_prop = dict_properties(df)
query_by_donor = generate_query_by_donor(donor, dict_prop)
# print(query_by_donor[2][0], query_by_donor[2][1])

In [8]:
df_donor = extract_donor_description(g1, query_by_donor)
display(df_donor.shape, df_donor.head(2))

(87, 59)

Unnamed: 0,Albumin,ALT,Anti_HCV,AST,CRP,Fibroscan,HBcrAg,HBeAg,HBsAg,HBV_DNA,...,CD3pos_CD8pos_CD14neg_CD19neg_HLA_DRpos,CD3pos_CD8pos_CD14neg_CD19neg_Ki67pos,CD3pos_CD8pos_CD14neg_CD19neg_PD1pos,CD3pos_CD8pos_CD14neg_CD19neg_PD1pos_KLRG1neg,CD3pos_CD8pos_CD14neg_CD19neg_PD1pos_KLRG1pos,CD3pos_gdTCRpos,hasSex,hasAge,hasTreatment,donor
0,Measurement:Albumin-0-Measurement_Of_Albumin_L...,Measurement:ALT-0-Measurement_Of_ALT_Level-Nor...,,Measurement:AST-0-Measurement_Of_AST_Level-Nor...,Measurement:CRP-0-Measurement_Of_CRP_Level-Nor...,,Measurement:HBcrAg-0-Measurement_Of_HBcrAg_Lev...,Measurement:HBeAg--280-Measurement_Of_HBeAg_Le...,Measurement:HBsAg-0-Measurement_Of_HBsAg_Level...,Measurement:HBV_DNA-0-Measurement_Of_HBV_DNA_L...,...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_HLA_...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_Ki67...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_PD1p...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_PD1p...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_PD1p...,,female,34,yes,Donor:HBsRE_1
1,Measurement:Albumin-0-Measurement_Of_Albumin_L...,Measurement:ALT-0-Measurement_Of_ALT_Level-Nor...,Measurement:Anti_HCV--189-Measurement_Of_Anti_...,Measurement:AST-0-Measurement_Of_AST_Level-Nor...,Measurement:CRP-0-Measurement_Of_CRP_Level-Nor...,,Measurement:HBcrAg-0-Measurement_Of_HBcrAg_Lev...,Measurement:HBeAg-0-Measurement_Of_HBeAg_Level...,Measurement:HBsAg-0-Measurement_Of_HBsAg_Level...,Measurement:HBV_DNA-0-Measurement_Of_HBV_DNA_L...,...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_HLA_...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_Ki67...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_PD1p...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_PD1p...,Measurement:CD3pos_CD8pos_CD14neg_CD19neg_PD1p...,,female,33,no,Donor:HBsRE_10


## Preprocess measures by different protocols

### Get all the protocols

In [9]:
text = ','.join([d for d in df_protocol.p.to_list()])
query = """
prefix donor: <http://www.project-improvit.de/Donor/>
prefix measurement: <http://www.project-improvit.de/Measurement/> 
prefix improvit: <http://www.project-improvit.de/vocab/> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

select distinct ?protocol
    where{
        ?donor rdf:type improvit:Donor .
        ?measure rdf:type improvit:Measurement .
        ?donor ?p ?measure .
        ?measure improvit:hasExpProtocolName ?protocol .
        FILTER(?p in ("""+text+"""))
    }    
"""

qres = g1.query(query)
q1 = sparql_results_to_df(qres)
q1 = adding_prefix(q1)
display(q1.shape, q1.head(2))

(12, 1)

Unnamed: 0,protocol
0,P10DC_ICS_Surface_OLP_T_cell_Panel
1,P10DC_ICS_Core_OLP_T_cell_Panel


In [10]:
def generate_query_by_measure(donor, dict_prop):
    query_by_measure = []
    for key, value in dict_prop.items():
        name = value.replace('.', '')
        triple="""OPTIONAL{ ?donor """ +key+ """ ?"""+name+""" .}\n"""
        project="""?donor ?"""+name
        query_by_measure.append([triple, project])
    return query_by_measure

In [11]:
dict_prop = dict_properties(df_protocol)
query_by_measure = generate_query_by_measure(donor, dict_prop)
print(query_by_measure[2][0], query_by_measure[2][1])

OPTIONAL{ ?donor improvit:measurementLeadsToCD19neg_CD14neg_CD3pos_CD4pos_Mibp1posResults ?CD19neg_CD14neg_CD3pos_CD4pos_Mibp1pos .}
 ?donor ?CD19neg_CD14neg_CD3pos_CD4pos_Mibp1pos


### Combining measures with protocols

In [12]:
col = []
for i in list(dict_prop.values()):
    for j in q1.protocol.to_list():
        col.append(i+'-'+j)

donor_protocol = pd.DataFrame(columns=col)
donor_protocol['donor'] = donor.donor.to_list()
display(donor_protocol.shape, donor_protocol.head(2))

(87, 97)

Unnamed: 0,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Surface_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Core_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Polymerase_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Total_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Surface_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Surface_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Polymerase_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Polymerase_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Core_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD4pos_IFNypos-P10DC_ICS_Total_OLP_MT_T_cell_Panel,...,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_PDL1_T_cell_Panel,donor
0,,,,,,,,,,,...,,,,,,,,,,Donor:HBsRE_1
1,,,,,,,,,,,...,,,,,,,,,,Donor:HBsRE_10


In [13]:
for k in range(len(query_by_measure)):
    triple, project = query_by_measure[k][0], query_by_measure[k][1]
    c = project.replace('?donor ?', '')
    query = """
    prefix improvit: <http://www.project-improvit.de/vocab/>
    prefix Donor: <http://www.project-improvit.de/Donor/>
    select distinct """ +project+"""
        where{ """+triple+"""
            }
    """
#     print(query)
    qres = g1.query(query)
    df = sparql_results_to_df(qres)
    df = adding_prefix(df)
#     display(df)
    for index, row in df.iterrows():
        result = row[c].split('-')
        index = list(np.where(donor_protocol["donor"] == row.donor)[0])[0]
        donor_protocol.at[index, c+'-'+result[2]] = result[3]
#     display(donor_protocol)
donor_protocol = donor_protocol.dropna(axis=1, how='all')

In [14]:
result = pd.merge(df_donor, donor_protocol, on="donor")
display(result.shape, result.head(2))

(87, 155)

Unnamed: 0,Albumin,ALT,Anti_HCV,AST,CRP,Fibroscan,HBcrAg,HBeAg,HBsAg,HBV_DNA,...,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_PDL1_T_cell_Panel
0,Measurement:Albumin-0-Measurement_Of_Albumin_L...,Measurement:ALT-0-Measurement_Of_ALT_Level-Nor...,,Measurement:AST-0-Measurement_Of_AST_Level-Nor...,Measurement:CRP-0-Measurement_Of_CRP_Level-Nor...,,Measurement:HBcrAg-0-Measurement_Of_HBcrAg_Lev...,Measurement:HBeAg--280-Measurement_Of_HBeAg_Le...,Measurement:HBsAg-0-Measurement_Of_HBsAg_Level...,Measurement:HBV_DNA-0-Measurement_Of_HBV_DNA_L...,...,0.388,1.126,,,,,,,,
1,Measurement:Albumin-0-Measurement_Of_Albumin_L...,Measurement:ALT-0-Measurement_Of_ALT_Level-Nor...,Measurement:Anti_HCV--189-Measurement_Of_Anti_...,Measurement:AST-0-Measurement_Of_AST_Level-Nor...,Measurement:CRP-0-Measurement_Of_CRP_Level-Nor...,,Measurement:HBcrAg-0-Measurement_Of_HBcrAg_Lev...,Measurement:HBeAg-0-Measurement_Of_HBeAg_Level...,Measurement:HBsAg-0-Measurement_Of_HBsAg_Level...,Measurement:HBV_DNA-0-Measurement_Of_HBV_DNA_L...,...,0.055,0.351,,,,,,,,


In [None]:
# df_donor.drop(columns='donor', inplace=True)
result = df_donor
display(result.shape, result.head(2))

### Extracting the value from the parameter result

In [15]:
df_donor = result.copy()
for col in df_donor.columns:
    if col in ['hasAge', 'donor']:
        continue
    for i in range(df_donor.shape[0]):
        if pd.isna(df_donor[col][i]):
            continue
        str_value = df_donor[col][i].split('-')[-1]
        df_donor[col][i] = str_value
    if is_float(str_value):
        df_donor[col] = df_donor[col].astype(float)
        
        
#     df_donor[col] = df_donor[col].str.replace('ttt', '-1')
    
df_donor.replace('female', 1, inplace=True)
df_donor.replace('male', 0, inplace=True)
df_donor.replace('yes', 1, inplace=True)
df_donor.replace('no', 0, inplace=True)
df_donor['hasSex'] = df_donor['hasSex'].astype(int)
display(df_donor.shape, df_donor.head(2))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_donor[col][i] = str_value


(87, 155)

Unnamed: 0,Albumin,ALT,Anti_HCV,AST,CRP,Fibroscan,HBcrAg,HBeAg,HBsAg,HBV_DNA,...,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_PDL1_T_cell_Panel
0,Normal_Level,Normal_Level_In_Female,,Normal_Level_In_Female,Normal_Level,,Low_Intrahepatic_Viral_Replicative_Activity,Undetectable_Of_HBeAg,100_999_pos,High_Viral_Load_Of_HBV,...,0.388,1.126,,,,,,,,
1,Normal_Level,Normal_Level_In_Female,Undetectable_Antibodies_To_HCV_No_Infection,Normal_Level_In_Female,Normal_Level,,Low_Intrahepatic_Viral_Replicative_Activity,Undetectable_Of_HBeAg,1000_9999_pos,Low_Viral_Load_Of_HBV,...,0.055,0.351,,,,,,,,


In [16]:
df_donor.to_csv('D0_HBsRE_v5.csv', index=None)

### Convert all categorical variables to numeric and Replacing NaN values by a constant

In [16]:
#get all categorical columns
cat_columns = df_donor.select_dtypes(['object']).columns.to_list()
cat_columns.remove('donor')
#convert all categorical variables to numeric
df_donor[cat_columns] = df_donor[cat_columns].apply(lambda x: pd.factorize(x)[0])
#Repalce NaN with -1 on all float columns 
df_donor = df_donor.fillna(-1000)  # -sys.maxsize  -1
df_donor.replace(-1, -1000, inplace=True)
display(df_donor.shape, df_donor.head(2))

(87, 155)

Unnamed: 0,Albumin,ALT,Anti_HCV,AST,CRP,Fibroscan,HBcrAg,HBeAg,HBsAg,HBV_DNA,...,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Surface_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Polymerase_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_MT_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Core_OLP_PDL1_T_cell_Panel,CD19neg_CD14neg_CD3pos_CD8pos_TNFapos-P10DC_ICS_Total_OLP_PDL1_T_cell_Panel
0,0,0,-1000,0,0,-1000,0,0,0,0,...,0.388,1.126,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0
1,0,0,0,0,0,-1000,0,0,1,1,...,0.055,0.351,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0,-1000.0


In [None]:
df_donor.to_csv('D0_HBsRE_v5.csv', index=None)