In [None]:
#versions: pandas==2.0.3, numpy==1.24.4

In [None]:
import pandas as pd
import numpy as np
from functools import reduce
from pandas.api.types import CategoricalDtype


In [None]:
AIBL_DATASET_DIR="./data/aibl_19Sep2019/Data_extract_3.3.0/"
AIBL_MRI_ADDITIONAL_DATA="./data/AIBL/AIBL_MRT_13112020.csv"#extracted from LONI, contains "Subject ID","Sex","Research Group","Visit","Archive Date","Study Date","Age","Description","Type","Imaging Protocol","Image ID" for all AIBL-scans
AIBL_MRI_FASTSURFER_DIR="./data/AIBL/output_fastsurfer/"
MAPPING_FILE="./data/texture_mapping.csv"
AIBL_TEXTURE_DATA="./data/AIBL/summary_scan_level.csv"
FREESURFER_LUT_DIR="./data/Freesurfer-LUT_new.txt"
SAVE_DIR_AIBL_DATASET="./data/AIBL_all.csv"
FILE_AIBL_SELECTION="./data/AIBL_subjects_scans.csv"

In [None]:
def getAIBLDiag():
    aiblDataDX=pd.read_csv(AIBL_DATASET_DIR+"aibl_pdxconv_01-Jun-2018.csv")
    aiblDataDX=aiblDataDX.filter(items=["RID","VISCODE","DXCURREN"])
    aiblDataDX=aiblDataDX[aiblDataDX.DXCURREN!=-4]
    aiblDataDX=aiblDataDX[aiblDataDX.DXCURREN!=7]
    diag=aiblDataDX.DXCURREN.copy()
    is_CN=(aiblDataDX.DXCURREN==1)
    diag[is_CN]="CN"
    is_MCI=(aiblDataDX.DXCURREN==2)
    diag[is_MCI]="MCI"
    is_Dementia=(aiblDataDX.DXCURREN==3)
    diag[is_Dementia]="Dementia"
    aiblDataDX["DXCURREN"]=diag
    aiblDataDX=aiblDataDX.rename(columns={"RID":"PTID","DXCURREN":"DX"})
    return(aiblDataDX)

def getAIBLAPOE():
    aiblDataApoE=pd.read_csv(AIBL_DATASET_DIR+"aibl_apoeres_01-Jun-2018.csv")
    aiblDataApoE=aiblDataApoE[aiblDataApoE.APGEN1!=-4]
    aiblDataApoE=aiblDataApoE[aiblDataApoE.APGEN2!=-4]
    apoE=aiblDataApoE.APGEN1.copy()
    is_2_2=((aiblDataApoE.APGEN1==2)&(aiblDataApoE.APGEN2==2))
    apoE[is_2_2]=0
    is_2_3=((aiblDataApoE.APGEN1==2)&(aiblDataApoE.APGEN2==3))
    apoE[is_2_3]=0
    is_2_4=((aiblDataApoE.APGEN1==2)&(aiblDataApoE.APGEN2==4))
    apoE[is_2_4]=1
    is_3_2=((aiblDataApoE.APGEN1==3)&(aiblDataApoE.APGEN2==2))
    apoE[is_3_2]=0
    is_3_3=((aiblDataApoE.APGEN1==3)&(aiblDataApoE.APGEN2==3))
    apoE[is_3_3]=0
    is_3_4=((aiblDataApoE.APGEN1==3)&(aiblDataApoE.APGEN2==4))
    apoE[is_3_4]=1
    is_4_2=((aiblDataApoE.APGEN1==4)&(aiblDataApoE.APGEN2==2))
    apoE[is_4_2]=1
    is_4_3=((aiblDataApoE.APGEN1==4)&(aiblDataApoE.APGEN2==3))
    apoE[is_4_3]=1
    is_4_4=((aiblDataApoE.APGEN1==4)&(aiblDataApoE.APGEN2==4))
    apoE[is_4_4]=2
    aiblDataApoE["APOE4"]=apoE
    aiblDataApoE=aiblDataApoE.filter(items=["RID","VISCODE","APOE4"])
    aiblDataApoE=aiblDataApoE.rename(columns={"RID":"PTID"})
    return(aiblDataApoE)

def getAIBLCogTest():
    aiblDataCDR=pd.read_csv(AIBL_DATASET_DIR+"aibl_cdr_01-Jun-2018.csv")
    aiblDataCDR=aiblDataCDR[aiblDataCDR.CDGLOBAL!=-4]
    aiblDataCDR=aiblDataCDR.filter(items=["RID","VISCODE","CDGLOBAL"])
    aiblDataCDR=aiblDataCDR.rename(columns={"RID":"PTID"})
    aiblDataMMSE=pd.read_csv(AIBL_DATASET_DIR+"aibl_mmse_01-Jun-2018.csv")
    aiblDataMMSE=aiblDataMMSE[aiblDataMMSE.MMSCORE!=-4]
    aiblDataMMSE=aiblDataMMSE.filter(items=["RID","VISCODE","MMSCORE"])
    aiblDataMMSE=aiblDataMMSE.rename(columns={"RID":"PTID"})
    aiblDataLOG=pd.read_csv(AIBL_DATASET_DIR+"aibl_neurobat_01-Jun-2018.csv")
    aiblDataLOG=aiblDataLOG[aiblDataLOG.LIMMTOTAL!=-4]
    aiblDataLOG=aiblDataLOG[aiblDataLOG.LDELTOTAL!=-4]
    aiblDataLOG=aiblDataLOG.filter(items=["RID","VISCODE","LIMMTOTAL","LDELTOTAL"])
    aiblDataLOG=aiblDataLOG.rename(columns={"RID":"PTID"})
    data_frames = [aiblDataCDR, aiblDataMMSE, aiblDataLOG]
    ges = reduce(lambda  left,right: pd.merge(left,right,left_on=["PTID","VISCODE"],right_on=["PTID","VISCODE"],how="outer"), data_frames)
    return(ges)

def getAIBLDemographyData():
    aiblDataDemo=pd.read_csv(AIBL_DATASET_DIR+"aibl_ptdemog_01-Jun-2018.csv")
    aiblDataDemo.PTGENDER.min()
    gender=aiblDataDemo.PTGENDER.copy()
    gender[(aiblDataDemo.PTGENDER==1)]="Male"
    gender[(aiblDataDemo.PTGENDER==2)]="Female"
    aiblDataDemo.PTGENDER=gender.copy()
    year=aiblDataDemo.PTDOB.astype(str)
    year = year.str[1:]
    year="01/07/"+year
    aiblDataDemo["GDAT"]=year
    timePointBL=pd.read_csv(AIBL_DATASET_DIR+"aibl_mmse_01-Jun-2018.csv")
    tp=timePointBL.EXAMDATE.copy()
    tp[(timePointBL.EXAMDATE=="02/27/3013")]="02/27/2013"
    timePointBL.EXAMDATE=tp
    dataset=pd.merge(timePointBL,aiblDataDemo,left_on=["RID","VISCODE"],right_on=["RID","VISCODE"],how="inner")
    dataset["GDAT"]= pd.to_datetime(dataset["GDAT"],format="%d/%m/%Y")
    dataset=dataset[dataset.EXAMDATE!="-4"]
    dataset["EXAMDATE"]= pd.to_datetime(dataset["EXAMDATE"].astype(str),format="%m/%d/%Y")
    diff = (dataset["EXAMDATE"]-dataset["GDAT"])/np.timedelta64(1,"Y")
    dataset["AGE"]=diff.round(2)
    dataset=dataset.filter(items=["RID","VISCODE","AGE"])
    aiblDataDemo=pd.merge(aiblDataDemo,dataset,left_on=["RID","VISCODE"],right_on=["RID","VISCODE"],how="outer")
    aiblDataDemo=aiblDataDemo.filter(items=["RID","VISCODE","PTGENDER","AGE"])
    aiblDataDemo=aiblDataDemo.rename(columns={"RID":"PTID"})
    return(aiblDataDemo)

def getMRIData():
    MRI=pd.read_csv(AIBL_MRI_ADDITIONAL_DATA)
    MRI=MRI[MRI["Research Group"]=="Patient"]
    MRI=MRI[MRI["Description"].isin(["MPRAGE ADNI confirmed","MPRAGE SAG ISO p2","MPRAGE SAG ISO p2_ND","MPRAGE ADNI confirmed REPEAT","MPRAGESAGISOp2ND","MPRAGE","MPRAGE ADNI conf REPEAT","MPRAGE ADNI confirmed RPT","MPRAGE ADNI REPEAT","MPRAGE ADNI confirmed REPEATX2","MPRAGE ADNI confirmed repeat","t1_mpr_ss_SAGITTAL_TRIPLE_MODE",])]
    MRI=MRI[MRI["Imaging Protocol"]!="Acquisition Plane=SAGITTAL;Mfg Model=Avanto;Slice Thickness=1.0;Matrix Z=160.0;Acquisition Type=3D;Field Strength=1.5;Manufacturer=SIEMENS;Weighting=PD"]
    viscode=MRI.Visit.copy()
    viscode[(MRI.Visit=="Baseline")]="bl"
    viscode[(MRI.Visit=="18 Month follow-up")]="m18"
    viscode[(MRI.Visit=="36 Month follow-up")]="m36"
    viscode[(MRI.Visit=="54 Month follow-up")]="m54"
    viscode[(MRI.Visit=="72 Month follow-up")]="m72"
    MRI["VISCODE"]=viscode
    a=MRI["Imaging Protocol"].str.split(";",expand=True)
    a.columns=["AcquisitionPlane","MfgModel","SliceThickness","MatrixZ","AcquisitionType","MagStrength","Manufacturer", "Weighting"]
    a["AcquisitionPlane"] = a["AcquisitionPlane"].str[18:]
    a["MfgModel"] = a["MfgModel"].str[10:]
    a["SliceThickness"] = a["SliceThickness"].str[16:]
    a["MatrixZ"] = a["MatrixZ"].str[9:]
    a["AcquisitionType"] = a["AcquisitionType"].str[17:]
    a["MagStrength"] = a["MagStrength"].str[15:]
    a["Manufacturer"] = a["Manufacturer"].str[13:]
    a["Weighting"] = a["Weighting"].str[10:]
    MRI = pd.concat([MRI.reset_index(drop=True), a.reset_index(drop=True)], axis=1)
    MRI=MRI.filter(items=["Subject ID","Image ID","VISCODE","SliceThickness","MagStrength","MfgModel","Manufacturer","Description"])
    MRI=MRI.rename(columns={"Subject ID":"PTID","Image ID":"IMAGEUID"})
    
    return(MRI)

def getAIBLDataset():
    AIBLDemo=getAIBLDemographyData()
    AIBLCogTest=getAIBLCogTest()
    AIBLApoE=getAIBLAPOE()
    AIBLDiag=getAIBLDiag()
    MRI=getMRIData()
    data_frames = [AIBLDemo, AIBLApoE]
    gesBL = reduce(lambda  left,right: pd.merge(left,right,left_on=["PTID","VISCODE"],right_on=["PTID","VISCODE"],how="outer"), data_frames)
    data_frames = [AIBLCogTest, AIBLDiag,MRI]
    gesVisits = reduce(lambda  left,right: pd.merge(left,right,left_on=["PTID","VISCODE"],right_on=["PTID","VISCODE"],how="outer"), data_frames)
    gesBL=gesBL.drop(["VISCODE"], axis=1)
    data_frames = [gesVisits, gesBL]
    ges = reduce(lambda  left,right: pd.merge(left,right,left_on=["PTID"],right_on=["PTID"],how="outer"), data_frames)
    visitCatsAIBL=["bl","m18","m36","m54","m72"]
    typeCatAIBL=pd.CategoricalDtype(categories=visitCatsAIBL,ordered=True)
    ges.loc[:,"VISCODE"]=ges["VISCODE"].astype(typeCatAIBL)
    ges=ges.sort_values(by="VISCODE")
    return(ges)

In [None]:
data_aseg=pd.read_table(AIBL_MRI_FASTSURFER_DIR+"aseg_stats.txt")
data_wmparc=pd.read_table(AIBL_MRI_FASTSURFER_DIR+"wmparc_stats.txt")
data_lhDKT=pd.read_table(AIBL_MRI_FASTSURFER_DIR+"lh.aparc.DKTatlas.mapped.volume.txt")
data_rhDKT=pd.read_table(AIBL_MRI_FASTSURFER_DIR+"rh.aparc.DKTatlas.mapped.volume.txt")

data_lhDKT=data_lhDKT.rename({"lh.aparc.DKTatlas.mapped.volume":"filename"},axis=1)
data_rhDKT=data_rhDKT.rename({"rh.aparc.DKTatlas.mapped.volume":"filename"},axis=1)
data_aseg=data_aseg.rename({"Measure:volume":"filename"},axis=1)
data_wmparc=data_wmparc.rename({"Measure:volume":"filename"},axis=1)

df=pd.merge(data_lhDKT,data_rhDKT,on="filename",suffixes=("", "_y"))

df=pd.merge(df,data_wmparc,on="filename",suffixes=("", "_y"))
df=pd.merge(df,data_aseg,on="filename",suffixes=("", "_y"))


cols = [c for c in df.columns if not c.endswith("_y")]
df=df[cols]


In [None]:
df["PTID"]=df.filename.str.split("_").str[3]
df["IMAGEUID"]=df.filename.str.split("_I").str[-1].str[:-1]

df=df.astype({"IMAGEUID": "str"})
df.iloc[:,1:191]=df.iloc[:,1:191].div(df.EstimatedTotalIntraCranialVol, axis=0)

In [None]:
aibl=getAIBLDataset()
aibl=aibl.round({"AGE":1})


In [None]:
aiblDataDemo=pd.read_csv(AIBL_DATASET_DIR+"aibl_ptdemog_01-Jun-2018.csv")
aiblDataDemo.PTGENDER.min()
gender=aiblDataDemo.PTGENDER.copy()
gender[(aiblDataDemo.PTGENDER==1)]="Male"
gender[(aiblDataDemo.PTGENDER==2)]="Female"
aiblDataDemo.PTGENDER=gender.copy()
year=aiblDataDemo.PTDOB.astype(str)
year = year.str[1:]
year="01/07/"+year
aiblDataDemo["GDAT"]=year
timePointBL=pd.read_csv(AIBL_DATASET_DIR+"aibl_mmse_01-Jun-2018.csv")
tp=timePointBL.EXAMDATE.copy()
tp[(timePointBL.EXAMDATE=="02/27/3013")]="02/27/2013"
timePointBL.EXAMDATE=tp
dataset=pd.merge(timePointBL,aiblDataDemo,left_on=["RID"],right_on=["RID"],how="inner",suffixes=["","_y"])
dataset["GDAT"]= pd.to_datetime(dataset["GDAT"],format="%d/%m/%Y")
dataset=dataset[dataset.EXAMDATE!="-4"]
dataset["EXAMDATE"]= pd.to_datetime(dataset["EXAMDATE"].astype(str),format="%m/%d/%Y")
diff = (dataset["EXAMDATE"]-dataset["GDAT"])/np.timedelta64(1,"Y")
dataset["AGE"]=diff.round(2)
dataset=dataset.filter(items=["RID","VISCODE","AGE"])
aiblDataDemo=pd.merge(aiblDataDemo,dataset,left_on=["RID","VISCODE"],right_on=["RID","VISCODE"],how="outer")
aiblDataDemo=aiblDataDemo.filter(items=["RID","VISCODE","AGE"])
aibl_demog=aiblDataDemo.rename(columns={"RID":"PTID"})

In [None]:
aibl=aibl.drop(["AGE"],axis=1)

In [None]:
aibl=pd.merge(aibl,aibl_demog,left_on=["PTID","VISCODE"],right_on=["PTID","VISCODE"],how="inner")

In [None]:
aibl_DXbl=pd.DataFrame(columns=["PTID","DX_bl"])
for PTID in aibl.PTID.unique():
    aibl_PTID=aibl[aibl.PTID==PTID]
    aibl_PTID=aibl_PTID.sort_values("AGE")
    DXBL=aibl_PTID.iloc[0]["DX"]
    aibl_DXbl=pd.concat([aibl_DXbl,pd.DataFrame({"PTID":[PTID],"DX_bl":[DXBL]})])

In [None]:
aibl=pd.merge(aibl,aibl_DXbl,left_on=["PTID"],right_on=["PTID"],how="inner")

In [None]:
aibl=aibl[aibl.DX_bl.isin(["CN","MCI"])]

In [None]:
aibl=aibl[~aibl.DX.isna()]

In [None]:
status=""
time=0.0

In [None]:

cat_type = CategoricalDtype(categories=["CN", "MCI", "Dementia"], ordered=True)

aibl["DX"] = aibl["DX"].astype(cat_type)

In [None]:
df_tte=pd.DataFrame(columns=["PTID","Status","Time","Ref_age"])
for PTID in aibl.PTID.unique():
    df_PTID=aibl[aibl.PTID==PTID]
    df_PTID=df_PTID.sort_values("AGE")
    status=""
    time=0.0
    if len(df_PTID.DX.unique())==1:
        if (df_PTID.DX.unique()[0]=="MCI"):
            status="sMCI"
            time=df_PTID.AGE.max()-df_PTID.AGE.min()
            ref_age=df_PTID.AGE.min()
        else:
            if (df_PTID.DX.unique()[0]=="CN"):
                status="sCN"
                time=df_PTID.AGE.max()-df_PTID.AGE.min()
                ref_age=df_PTID.AGE.min()
    else:
        if "Dementia" in df_PTID.DX.tolist():
            if (all((df_PTID.sort_values("DX").reset_index().AGE)==(df_PTID.reset_index().AGE))):
                if df_PTID.iloc[0].DX=="MCI":
                    status="pMCI"
                else:
                    if df_PTID.iloc[0].DX=="CN":
                        status="pCN"
                df_PTID_AD=df_PTID[df_PTID.DX=="Dementia"]
                time=df_PTID_AD.AGE.min() - df_PTID.AGE.min()
                ref_age=df_PTID.AGE.min()
            else:
                if df_PTID.iloc[0].DX=="MCI":
                    df_PTID_AD=df_PTID[df_PTID.DX=="Dementia"]
                    df_PTID_CN=df_PTID[df_PTID.DX=="CN"]
                    df_PTID_MCI=df_PTID[df_PTID.DX=="MCI"]
                    value_AD=df_PTID_AD.AGE.min()
                    value_MCI=df_PTID_MCI.AGE.max()
                    if(df_PTID_CN.shape[0]==0):
                        value_CN=0
                    else:
                        value_CN=df_PTID_CN.AGE.max()
                    if(value_CN<value_AD and value_MCI<value_AD):
                        status="uMCIADClear"
                    else:
                        status="uMCIADUnclear"
                else:
                    if df_PTID.iloc[0].DX=="CN":
                        df_PTID_AD=df_PTID[df_PTID.DX=="Dementia"]
                        df_PTID_CN=df_PTID[df_PTID.DX=="CN"]
                        df_PTID_MCI=df_PTID[df_PTID.DX=="MCI"]
                        value_AD=df_PTID_AD.AGE.min()
                        value_CN=df_PTID_CN.AGE.max()
                        if(df_PTID_MCI.shape[0]==0):
                            value_MCI=0
                        else:
                            value_MCI=df_PTID_MCI.AGE.max()
                        if(value_CN<value_AD and value_MCI<value_AD):
                            status="uCNADClear"
                        else:
                            status="uCNADUnclear"
                df_PTID_AD=df_PTID[df_PTID.DX=="Dementia"]
                time=df_PTID_AD.AGE.min()-df_PTID.AGE.min()
                ref_age=df_PTID.AGE.min()
        else:
            if (all((df_PTID.sort_values("DX").reset_index().AGE)==(df_PTID.reset_index().AGE))):
                if df_PTID.iloc[0].DX=="CN":
                    status="CNtoMCI"
                    time=df_PTID.AGE.max()-df_PTID.AGE.min()
                    ref_age=df_PTID.AGE.min()
            else:
                if df_PTID.iloc[0].DX=="MCI":
                    status="uMCINoAD"
                else:
                    if df_PTID.iloc[0].DX=="CN":
                        status="uCNNoAD"
                time=df_PTID.AGE.max()-df_PTID.AGE.min()
                ref_age=df_PTID.AGE.min()
    if not ((len(df_PTID.DX.unique())==1) and (df_PTID.iloc[0].DX=="Dementia")):
        if not((~df_PTID.IMAGEUID.isna()).sum() ==0):
            df_tte=pd.concat([df_tte,pd.DataFrame({"PTID":[PTID],"Status":[status], "Time": [time],"Ref_age":[ref_age]})],ignore_index=True)

In [None]:
df_tte=df_tte[df_tte.Time>0]

In [None]:
df_tte=df_tte[df_tte.Status.isin(["sCN","sMCI","pMCI","uMCINoAD","uCNNoAD","uMCIADClear","uCNADClear","CNtoMCI","pCN"])]

In [None]:
aibl=pd.merge(aibl,df_tte,on="PTID")

In [None]:
aiblSelection=pd.read_csv(FILE_AIBL_SELECTION)
aiblSelection=aiblSelection.astype({"IMAGEUID":"str"})

In [None]:
aibl=aibl[~aibl.IMAGEUID.isna()]

In [None]:
aibl=aibl.astype({"IMAGEUID":"int"})
aibl=aibl.astype({"IMAGEUID":"str"})


In [None]:
aibl=aibl[aibl.IMAGEUID.isin(aiblSelection.IMAGEUID)]

In [None]:
df_ges=aibl

In [None]:
df_ges=pd.merge(df_ges,df,on="IMAGEUID",suffixes=["","_y"])
cols = [c for c in df_ges.columns if not c.endswith("_y")]
df_ges=df_ges[cols]


In [None]:
volume_feature=pd.read_csv(MAPPING_FILE)


In [None]:
df_ges.Time=df_ges.Time-(df_ges.AGE-df_ges.Ref_age)

In [None]:
df_ges=df_ges.sort_values("AGE")

In [None]:
df_ges=df_ges.set_index(["PTID","IMAGEUID"])

In [None]:
df_ges_copy=df_ges.copy()
df_ges_copy=df_ges_copy.reset_index(drop=True)

In [None]:
df_ges

In [None]:
df_ges=df_ges.filter(["PTID","PTGENDER","CDGLOBAL","MMSCORE","APOE4","AGE","IMAGEUID","MagStrength","Status","Time","LDELTOTAL","LIMMTOTAL","EstimatedTotalIntraCranialVol"]+volume_feature.region_volume.tolist(),axis=1)

In [None]:
df_ges

In [None]:
texture=pd.read_csv(AIBL_TEXTURE_DATA)

In [None]:

texture["PTID"]=texture.path.str.split("/").str[7].str.split("_").str[3]

texture["IMAGEUID"]=texture.path.str.split("/").str[7].str.split("_I").str[-1].str.split("/").str[0]

texture=texture.drop(["path"],axis=1)

texture=texture.astype({"IMAGEUID":"int"})
texture=texture.astype({"IMAGEUID":"str"})

In [None]:
freeSurfer_LUT=pd.read_table(FREESURFER_LUT_DIR, sep=";")
df=pd.DataFrame([texture.drop(["PTID","IMAGEUID"],axis=1).columns,texture.drop(["PTID","IMAGEUID"],axis=1).columns.str.split("_").str[-1],texture.drop(["PTID","IMAGEUID"],axis=1).columns.str.rsplit("_", n=1).str.get(0)]).transpose()
df.columns=["feature_name","region","feature_name_without_region"]
df.region=pd.to_numeric(df.region)
dfmerged=pd.merge(df,freeSurfer_LUT,left_on="region",right_on="0", how="left")
cnames=dfmerged.feature_name+"_"+dfmerged.Unknown
cnames=cnames.tolist()
cnames.append("PTID")
cnames.append("IMAGEUID")
texture.columns=cnames

In [None]:
df_ges=pd.merge(df_ges,texture,on="IMAGEUID",how="left",suffixes=["","_y"])
cols = [c for c in df_ges.columns if not c.endswith("_y")]
df_ges=df_ges[cols]

df_ges=df_ges.dropna(thresh=int(len(df_ges)*0.5), axis=1)

In [None]:
list_Y_train=list()
for i, dat in df_ges.iterrows():
    statNew=False
    if (dat.Status=="sCN") or (dat.Status=="sMCI") or (dat.Status=="CNtoMCI") or (dat.Status=="uMCINoAD") or (dat.Status=="uCNNoAD"):
        statNew=False
    else:
        statNew=True
    list_Y_train.append(statNew)

In [None]:
df_ges["out"]=list_Y_train

In [None]:
list_Y_train=list()
for i, dat in df_ges.iterrows():
    statNew="CN"
    if (dat.Status=="sMCI") or  (dat.Status=="pMCI")  or (dat.Status=="uMCINoAD") or (dat.Status=="uMCIADClear"):
        statNew="MCI"
    list_Y_train.append(statNew)

In [None]:
df_ges["init_diag"]=list_Y_train

In [None]:
df_ges.to_csv(SAVE_DIR_AIBL_DATASET)

In [None]:
df_ges