In [4]:
import numpy as np
import pandas as pd 
import seaborn as sns
import sklearn as sk
from icecream import ic
from typing import Dict, Callable
import csv
import os
from datetime import datetime

In [5]:




# Base Data Class
class Data:
    def __init__(self, path_of_folder: str, foldertype: str = "PPMI"):
        """
        Loads the dataset from the given path and initializes the data structures.
        """
        self.path = path_of_folder
        self.foldertype = foldertype
        if foldertype=="PPMI":
            self.complete_data = self.load_ppmi(path_of_folder, foldertype)
        elif foldertype=="tuebingen":
            self.complete_data = self.load_tuebingen(path_of_folder, foldertype)
        elif foldertype=="custom":
            self.complete_data = self.load_costum(path_of_folder, foldertype)
        else:
            raise ValueError("For Folder, only 'PPMI', 'tuebingen' or 'custom' are allowed")

        self.covariates = None  # Subset of data used for matching
        self.covariates_longitude = None  # Data for longitudinal analysis
        self.df = None  # Data in format for statistical models probably numpy or something similar
        self.is_converted_to_standard = False

    def load_ppmi(self, path_of_folder: str, folder: str):
        """
        Loads in data according to PPMI layout
        """
        if folder!="PPMI":
            raise ValueError("For Folder, only 'PPMI' is allowed")
        
        datdir =path_of_folder


        stat = pd.read_csv(datdir+'/Participant_Status_13Feb2024.csv')
        dbs = pd.read_csv(datdir+'/Surgery_for_PD_Log_15Feb2024.csv')
        dbs = dbs.loc[dbs.PDSURGTP==1,:]
        medication = pd.read_csv(datdir+'/LEDD_Concomitant_Medication_Log_24Apr2024.csv')
        
        diaghist = pd.read_csv(datdir+'/PD_Diagnosis_History_21Feb2024.csv')
        diaghist.SXDT = pd.to_datetime(diaghist.SXDT)

        demo = pd.read_csv(datdir+'/Demographics_25Apr2024.csv')
        demo['SEX'] = np.array(['female', 'male'])[demo['SEX']]
        #%% updrs

        #Load in and merge to full updrs1
        updrs1 = pd.read_csv(datdir+'/MDS-UPDRS_Part_I_15Feb2024.csv')
        updrs1p = pd.read_csv(datdir+'/MDS-UPDRS_Part_I_Patient_Questionnaire_15Feb2024.csv')
        updrs1 = pd.merge(updrs1, updrs1p, on='PATNO', how='outer')

        #Load in and merge to full updrs2
        updrs2p = pd.read_csv(datdir+'/MDS_UPDRS_Part_II__Patient_Questionnaire_15Feb2024.csv')
        
        updrs3 = pd.read_csv(datdir+'/MDS-UPDRS_Part_III_15Feb2024.csv')


        UPDRSrig = ["NP3RIGLL",
                    "NP3RIGLU",
                    "NP3RIGN",
                    "NP3RIGRL",
                    "NP3RIGRU"]

        updrs3['rigidity'] = updrs3.loc[:,UPDRSrig].sum(1)

        updrs3["bradykinesia"] = updrs3.loc[:,"NP3BRADY"]
        UPDRSlat = ["NP3FTAP",
                    "NP3HMOV",
                    "NP3KTRM",
                    "NP3LGAG", 
                     "NP3PRSP",
                     "NP3PTRM"]

        updrs3['latindex'] = (updrs3.loc[:,[i+"R" for i in UPDRSlat]].sum(1) - updrs3.loc[:,[i+"L" for i in UPDRSlat]].sum(1))#/updrs3.loc[:,[i+"R" for i in UPDRSlat]].sum(1) + updrs3.loc[:,[i+"L" for i in UPDRSlat]].sum(1)
        latix = updrs3.groupby(["PATNO","EVENT_ID", "PDSTATE"],as_index=False).latindex.mean()
        ##average over MED ON and OFF states (only for UPDRS3)
        #updrs3 = updrs3.groupby(["PATNO","EVENT_ID"],as_index=False).NP3TOT.mean()
        updrs3 = pd.merge(updrs3,latix)

        updrs4 = pd.read_csv(datdir+'/MDS-UPDRS_Part_IV__Motor_Complications_15Feb2024.csv')
        #updrs4.NP4TOT
        mds_updrs = dict(mds_updrs1=updrs1, mds_updrs2=updrs2p, mds_updrs3=updrs3, mds_updrs4=updrs4)

        #updrs.loc[:,"UPDRS_SUMSCORE"] = updrs.loc[:,['NP1RTOT','NP1PTOT','NP2PTOT','NP3TOT','NP4TOT']].sum(1)

        moca = pd.read_csv(datdir+'/Montreal_Cognitive_Assessment__MoCA__12Mar2024.csv')

        #REM sleep questionnaire
        rbd = pd.read_csv(datdir+'/REM_Sleep_Behavior_Disorder_Questionnaire_08Feb2024.csv')
        
        ppmi_dict = {"stat":stat, "dbs":dbs, "medication": medication, "diaghist":diaghist, "demo":demo, "mds_updrs":mds_updrs, "moca":moca, "rbd":rbd}

        return ppmi_dict

    def load_from_csv(self, csv_path, col_dict):
        df = pd.read_csv(csv_path, usecols=col_dict.keys())
        df = df.rename(columns=col_dict)
        return df

    def load_tuebingen(self, path_of_folder: str, foldertype: str):
        """
        Loads in data from costom (your) data folder. Has to be implemented according to your data layout
        """
        if foldertype!="tuebingen":
            raise ValueError("For Folder, only 'tuebingen' is allowed")
        
        datadir = path_of_folder
        #Load in DBS Data
        bdi = pd.read_excel(datadir + '/1. BDI/BDI_v1.xlsx')
        bdi = pd.read_excel(datadir + '/1. BDI/BDI_v2-aktuell.xlsx')

        #Load in Medication
        medication = pd.read_excel(datadir + '/3. Medication/Medication.xlsx')

        #Load in Demographics
        demo = pd.read_excel(datadir + '/6. Demographics/PD_demographics.xlsx')

        #Create unique identifier and map on Patient numbers that will get used in the other dataframes
        demo = demo.dropna(subset=['OP_DATUM'])
        demo['Unique identifier']  = demo['OP_DATUM'].astype(str) + "_" + demo['GEB_DATUM'].astype(str)
        
        identifiers = demo['Unique identifier'].tolist()
        IDs = list(range(1, len((identifiers))+1))
        ui_id_mapping = dict(zip(identifiers, IDs))
        demo.insert(0, "PATNO",demo['Unique identifier'].map(ui_id_mapping))
        demo = demo.drop('Unique identifier', axis=1)
        #df = df.drop('Unique identifier', axis=1)
        
        #Function to add PATNO to the other dataframes according to the unique identifier in the demographics dataframe
        def add_patno(df, mapping_dict=ui_id_mapping):
            df = df.dropna(subset=['OP_DATUM'])
            df['Unique identifier'] = df['OP_DATUM'].astype(str) + "_" + df['GEB_DATUM'].astype(str)
            df.insert(0, "PATNO", df['Unique identifier'].map(mapping_dict))
            nan_patno = df['PATNO'].isna()
            if nan_patno.any():
                new_identifiers = df.loc[nan_patno, 'Unique identifier'].tolist()
                new_ids = list(range(max(mapping_dict.values()) + 1, max(mapping_dict.values()) + 1 + len(new_identifiers)))
                new_mapping = dict(zip(new_identifiers, new_ids))
                mapping_dict.update(new_mapping)
                df.loc[nan_patno, 'PATNO'] = df.loc[nan_patno, 'Unique identifier'].map(new_mapping)
            df['PATNO'] = df['PATNO'].astype(int)
            df = df.drop('Unique identifier', axis=1)
            return df    
        #demo = demo.rename(columns={"Unique identifier": "PATNO"})

        #Load in MDS-UPDRS
        mds_updrs1 = add_patno(pd.read_excel(datadir + '/2. MDS-UPDRS/MDS-UPDRS_I.xlsx'))
        mds_updrs2 = add_patno(pd.read_excel(datadir + '/2. MDS-UPDRS/MDS-UPDRS_II.xlsx'))
        mds_updrs3 = add_patno(pd.read_excel(datadir + '/2. MDS-UPDRS/MDS-UPDRS_III.xlsx'))
        mds_updrs4 = add_patno(pd.read_excel(datadir + '/2. MDS-UPDRS/MDS-UPDRS_IV.xlsx'))
        
        #Add a total score for each updrs part
        mds_updrs1['MDS-UPDRS_1.TOT'] = mds_updrs1.iloc[:, 5:].sum(axis=1)
        mds_updrs2['MDS-UPDRS_2.TOT'] = mds_updrs2.iloc[:, 5:].sum(axis=1)
        #mds_updrs3['MDS-UPDRS_1.TOT'] = mds_updrs3.iloc[:, 5:].sum(axis=1)
        mds_updrs4['MDS-UPDRS_4.TOT'] = mds_updrs4.iloc[:, 5:].sum(axis=1)
        mds_updrs = dict(mds_updrs1=mds_updrs1, mds_updrs2=mds_updrs2, mds_updrs3=mds_updrs3, mds_updrs4=mds_updrs4)
        
        #Load in MoCA
        moca = add_patno(pd.read_excel(datadir + '/4. MoCA/MoCA.xlsx'))

        #Load in PDQ-39
        pdq39 = add_patno(pd.read_excel(datadir + '/5. PDQ-39/PDQ-39.xlsx'))

        #Load in Stimulation
        stimulation = add_patno(pd.read_excel(datadir + '/7. Stimulation/Stimulation_parameters.xlsx'))

        #Load in BBS
        bbs = add_patno(pd.read_excel(datadir + '/8. BBS/BBS.xlsx'))

        #Load in UPDRS (OLD)
        updrs_old = add_patno(pd.read_excel(datadir + '/9. UPDRS (OLD)/UPDRS_III.xlsx'))

        

        dbs_dict = {"bdi":bdi, "medication":medication, "demo":demo, "mds_updrs":mds_updrs, "moca":moca, "pdq39":pdq39, "stimulation":stimulation, "bbs":bbs, "updrs_old":updrs_old}
        if foldertype=="tuebingen":
            self.export_covariate_names(dbs_dict, path_of_folder)
        return dbs_dict
    

    def load_costum(self, path_of_folder: str, foldertype: str):
        """
        Loads in data from costom (your) data folder. Has to be implemented according to your data layout
        """
        pass

    def export_covariate_names(self, full_df, path: str):
        """
        Assuming that this dataset will be the standard when it comes to dataframe keys, export column names to a csv. file.
        """
        if self.foldertype=="tuebingen":
            with open(path + '/covariate_names.csv', 'w', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(full_df.keys())  # Write the dictionary keys as the header row

                def write_dict_to_csv(d, parent_key=''):
                    for key, value in d.items():
                        if isinstance(value, dict):
                            write_dict_to_csv(value)
                        else:
                            writer.writerow([parent_key + key] + list(value.columns))

                write_dict_to_csv(full_df)
            
    def select_covariates(self, covariates: Dict = None):
        """
        Select a subset of data based on the specified covariates of interest. Has to be specified by the user
        """
        
        #Set covaries of interest as given in the dictionary
        if covariates is not None:
            self.covariates = covariates
        else:
            #Manually select/compute the covariates of interest
            self.covariates =  self.complete_data["mds_updrs"] # Replace with the filtered data
        self.is_converted_to_standard = True
        return self.covariates

    def to_longitudinal_data(self):
        """
        Prepare the data for longitudinal analysis.
        Return data in format:
        output(dict) -> covariates(dict) -> PATNOs(dict) -> EXAMDATES(list)
        """
        print("Preparing data for longitudinal analysis...")
        # Implement logic for preparing data for longitudinal analysis
        data = self.covariates
        self.df_longitude = {}
        # Initialize OP_DATUM dictionary
        self.df_longitude['OP_DATUM'] = {}
        for key, value in data.items():
            grouped_data = value.groupby('PATNO')
            self.df_longitude[key] = {}
            for patno, group in grouped_data:
                # Convert 'TEST_DATUM' to string format
                group['TEST_DATUM'] = pd.to_datetime(group['TEST_DATUM'], errors='coerce')
                group['TEST_DATUM'] = group['TEST_DATUM'].apply(lambda x: x.strftime('%Y-%m-%d') if not pd.isnull(x) else x)
                self.df_longitude[key][patno] = {'TEST_DATUM': group['TEST_DATUM'].tolist()}
                
                # Store OP_DATUM for each PATNO
                if patno not in self.df_longitude['OP_DATUM']:
                    # Convert 'OP_DATUM' to string format if it's not already a string
                    if not isinstance(group['OP_DATUM'].iloc[0], str):
                        group['OP_DATUM'] = pd.to_datetime(group['OP_DATUM'], errors='coerce')
                        group['OP_DATUM'] = group['OP_DATUM'].apply(lambda x: x.strftime('%Y-%m-%d') if not pd.isnull(x) else x)
                    self.df_longitude['OP_DATUM'][patno] = group['OP_DATUM'].iloc[0]   
        return self.df_longitude    
    #def to_df(self):
    #    """
    #    Takes the already selected covariates and converts them to a pd df format
    #    """
    #    # Merge all dataframes in the dictionary by 'PATNO'
    #    merged_df = None
    #    for key, df in self.covariates.items():
    #        if merged_df is None:
    #            merged_df = df
    #        else:
    #            merged_df = pd.merge(merged_df, df, on=['PATNO', 'TEST_DATUM'], how='outer')    
    #            self.df = merged_df
    #    return self.df

    def convert_ppmi_to_standard_keys(self, file: str):
        """
        Convert the PPMI data to the standard keys, which are based on the tuebingen data format.
        """
        #Reading in the covariate names file
        if self.is_converted_to_standard:
            print("Already converted to standard")
            return
        covariate_file = file
        if not os.path.exists(covariate_file):
            raise FileNotFoundError(f"Covariate names file not found at {covariate_file}. Please provide the file.")

        covariate_dict = {}
        with open(covariate_file, mode='r') as infile:
            reader = csv.reader(infile)
            header = next(reader)
            for rows in reader:
                key = rows[0]
                covariate_dict[key] = rows[1:]
        self.complete_data

        #Changing the keys of the dataframes to the standard keys
        #Exchange keys of UPDRS
        #UPDRS1
        self.complete_data['mds_updrs']['mds_updrs1'].columns.values[6:12] = covariate_dict['mds_updrs1'][5:11]
        self.complete_data['mds_updrs']['mds_updrs1'].columns.values[5:19] = covariate_dict['mds_updrs1'][4:18]

        #UPDRS2
        self.complete_data['mds_updrs']['mds_updrs2'].columns.values[6:20] = covariate_dict['mds_updrs2'][5:19]
        self.complete_data['mds_updrs']['mds_updrs2'].rename(columns={'INFODT': covariate_dict['mds_updrs3'][4]}, inplace=True)  
        #UPDRS3
        #questions
        self.complete_data['mds_updrs']['mds_updrs3'].columns.values[23:56] = covariate_dict['mds_updrs3'][8:41]
        #HY scale
        self.complete_data['mds_updrs']['mds_updrs3'].rename(columns={'NHY': covariate_dict['mds_updrs3'][-2]}, inplace=True)
        #total score
        self.complete_data['mds_updrs']['mds_updrs3'].rename(columns={'NP3TOT': covariate_dict['mds_updrs3'][-1]}, inplace=True)  
        #Dyskinesia presence
        self.complete_data['mds_updrs']['mds_updrs3'].rename(columns={'DYSKPRES': covariate_dict['mds_updrs3'][-4]}, inplace=True)  
        #Exam date
        self.complete_data['mds_updrs']['mds_updrs3'].rename(columns={'INFODT': covariate_dict['mds_updrs3'][4]}, inplace=True)  
        #UPDRS4    
        self.complete_data['mds_updrs']['mds_updrs4'].columns.values[[5, 9, 10, 14, 15, 16, 20]] = covariate_dict['mds_updrs4'][5:]
        # Drop specified columns from UPDRS4
        self.complete_data['mds_updrs']['mds_updrs4'].drop(self.complete_data['mds_updrs']['mds_updrs4'].columns[[6, 7, 8, 11, 12, 13, 17, 18, 19]], axis=1, inplace=True)

        #Demographics
        

        #MOCA
        moca_dict = {}
        moca_dict['executive'] = self.complete_data['moca'].iloc[:, 5:9].sum(axis=1)
        moca_dict['naming'] = self.complete_data['moca'].iloc[:, 10:12].sum(axis=1)
        moca_dict['attention_numbers'] = self.complete_data['moca'].iloc[:, 13:14].sum(axis=1)
        moca_dict['attention_letters'] = self.complete_data['moca'].iloc[:, 15]
        moca_dict['attention_substract'] = self.complete_data['moca'].iloc[:, 16]
        moca_dict['language_rep'] = self.complete_data['moca'].iloc[:, 17] 
        moca_dict['language_letters'] = self.complete_data['moca'].iloc[:, 18:19].sum(axis=1)
        moca_dict['abstraction'] = self.complete_data['moca'].iloc[:, 20]
        moca_dict['reminding'] = self.complete_data['moca'].iloc[:, 21:25].sum(axis=1)
        moca_dict['orientation'] = self.complete_data['moca'].iloc[:, 26:31].sum(axis=1)
        moca_dict['total'] = self.complete_data['moca'].iloc[:, 32]  
        moca_dict = dict(zip(covariate_dict['moca'][5:16], moca_dict.values()))
        self.complete_data['moca'] = pd.concat([
            self.complete_data['moca'].iloc[:, :5],
            pd.DataFrame(moca_dict),
            self.complete_data['moca'].iloc[:, -2:]
        ], axis=1)
        self.complete_data['moca'].rename(columns={'INFODT': covariate_dict['moca'][4]}, inplace=True)  
        self.is_converted_to_standard = True
        return covariate_dict
    
    def convert_costom_to_standard_keys(self, file: str):
        """
        convert the costom data to the standard keys, which are based on the tuebingen data format
        """
        pass


    def remove_outliers(self):
        """
        Placeholder for outlier removal. To be implemented.
        """
        print("Removing outliers...")
        # Implement logic for removing outliers
        pass


# Propensity Matching Class
class PropensityMatching:
    def __init__(self, ppmi: Data, custom: Data):
        """
        Initialize the propensity matching with PPMI and DBS data.
        """
        self.ppmi = ppmi
        self.custom = custom
         
        self.ppmi_for_model = None
        self.custom_for_model = None

    def match(self, matching_method: Callable, grouping_func: Callable, classification_model: Callable):
        """patno matching...")"""
        # Implement the matching logic using the provided methods
        return None

    def match_method1(self):
        """
        Match on distance of preoperative test dates and follow up test dates.
        """
        
        def convert_dates_to_days(date_dict: Dict[str, Dict[str, list]]):
            """
            Convert dates in the format 'YYYY-MM-DD' to the number of days since the first date.
            """
            for covariate, sub_dict in date_dict.items():
                for patno, date_list in sub_dict.items():
                    # Convert string dates to datetime objects
                    date_list = [datetime.strptime(date, '%Y-%m-%d') for date in date_list if date]
                    if date_list:
                        # Find the minimum date
                        min_date = datetime(1950, 1, 1)    # Convert dates to the number of days since the minimum date
                        date_dict[covariate][patno] = [(date - min_date).days for date in date_list]
            return date_dict
        
        def find_preop_test(date_dict: Dict[str, Dict[str, list]]):
            """
            Find the last test date preceding the operation for each patient in the DBS cohort.
            """
            pass
            
        def match_on_preop():
            """
            Match patients based on the preoperative test dates.
            """
            pass

        def match_on_followup():
            """
            Match patients based on the follow-up dates.
            """
            pass
        
        
            

    def grouping_method1(self):
        """
        Example grouping method 1. Replace with actual logic.
        """
        print("Using grouping method 1...")
        pass

    def classification_model1(self):
        """
        Example classification model 1. Replace with actual logic.
        """
        print("Using classification model 1...")
        pass


In [6]:
path_ppmi = '/home/georg-tirpitz/Documents/Neuromodulation/Parkinson_PSM/PPMI'
path_tue = '/home/georg-tirpitz/Documents/Neuromodulation/QuestionnaireData'
ppmi = Data(path_ppmi, foldertype="PPMI")
tue = Data(path_tue, foldertype="tuebingen")

  diaghist.SXDT = pd.to_datetime(diaghist.SXDT)


In [7]:

#ic(dbs.complete_data['demo'].keys())
#tue.complete_data['moca'].head(5)
keys = tue.complete_data['moca'].keys()
for i, key in enumerate(keys):
    print(i, key)
# Find and print two instances where the PATNO is the same
tue.complete_data["mds_updrs"]['mds_updrs1'].head(5)
#ic(type(dbs.complete_data['demo']['Unique identifier']))
#ic(ppmi.complete_data['updrs'].head(1))




0 PATNO
1 GEB_DATUM
2 OP_DATUM
3 OP_ZAHLEN
4 TEST_DATUM
5 MoCA_Executive
6 MoCA_Benennen
7 MoCA_Aufmerksamkeit_Zahlenliste
8 MoCA_Aufmerksamkeit_Buchstabenliste
9 MoCA_Aufmerksamkeit_Abziehen
10 MoCA_Sprache_Wiederholen
11 MoCA_Sprache_Buchstaben
12 MoCA_Abstraktion
13 MoCA_Erinnerung
14 MoCA_Orientierung
15 MoCA_ONLY_GES


Unnamed: 0,PATNO,GEB_DATUM,OP_DATUM,OP_ZAHLEN,TEST_DATUM,MDS-UPDRS_1.1,MDS-UPDRS_1.2,MDS-UPDRS_1.3,MDS-UPDRS_1.4,MDS-UPDRS_1.5,MDS-UPDRS_1.6,MDS-UPDRS_1.7,MDS-UPDRS_1.8,MDS-UPDRS_1.9,MDS-UPDRS_1.10,MDS-UPDRS_1.11,MDS-UPDRS_1.12,MDS-UPDRS_1.13,MDS-UPDRS_1.TOT
0,526,1956-11-06 00:00:00,2018-04-06 00:00:00,2018-PD3,2017-07-14,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,4.0
1,355,1968-02-20 00:00:00,2018-02-02 00:00:00,2018-PD1,2017-10-16,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,5.0
2,551,1949-05-03 00:00:00,2018-03-12 00:00:00,2018-PD11,2017-11-13,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0
3,479,1945-01-12 00:00:00,2018-04-20 00:00:00,2018-PD5,2018-01-29,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,2.0,2.0,1.0,0.0,1.0,11.0
4,543,1967-06-11 00:00:00,2018-04-13 00:00:00,2018-PD4,2018-02-27,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,4.0


In [8]:
pd.set_option('display.max_columns', None)
keys = ppmi.complete_data["mds_updrs"]["mds_updrs2"].keys()
for i, key in enumerate(keys):
    print(i, key)
#['NP4WDYSK','NP4DYSKI','NP4OFF','NP4FLCTI','NP4FLCTX','NP4DYSTN', 'NP4TOT'] 5, 9, 10, 14, 15, 16, 20

0 REC_ID
1 PATNO
2 EVENT_ID
3 PAG_NAME
4 INFODT
5 NUPSOURC
6 NP2SPCH
7 NP2SALV
8 NP2SWAL
9 NP2EAT
10 NP2DRES
11 NP2HYGN
12 NP2HWRT
13 NP2HOBB
14 NP2TURN
15 NP2TRMR
16 NP2RISE
17 NP2WALK
18 NP2FREZ
19 NP2PTOT
20 ORIG_ENTRY
21 LAST_UPDATE


In [9]:
covariates = ppmi.convert_ppmi_to_standard_keys('/home/georg-tirpitz/Documents/Neuromodulation/QuestionnaireData/covariate_names.csv')
ppmi.complete_data['moca'].keys()

Index(['REC_ID', 'PATNO', 'EVENT_ID', 'PAG_NAME', 'TEST_DATUM',
       'MoCA_Executive', 'MoCA_Benennen', 'MoCA_Aufmerksamkeit_Zahlenliste',
       'MoCA_Aufmerksamkeit_Buchstabenliste', 'MoCA_Aufmerksamkeit_Abziehen',
       'MoCA_Sprache_Wiederholen', 'MoCA_Sprache_Buchstaben',
       'MoCA_Abstraktion', 'MoCA_Erinnerung', 'MoCA_Orientierung',
       'MoCA_ONLY_GES', 'ORIG_ENTRY', 'LAST_UPDATE'],
      dtype='object')

In [10]:
tue.select_covariates()
#ppmi.select_covariates()
#print(tue.df)
tue.to_longitudinal_data()['mds_updrs3']
tue.to_longitudinal_data()['OP_DATUM']
#ppmi.to_longitudinal_data()
#print(type(long['mds_updrs3'][610]['TEST_DATUM'][0]))
#print(type(long['mds_updrs3'][4]['TEST_DATUM'][0]))
#type(long['mds_updrs3'][4])


Preparing data for longitudinal analysis...
Preparing data for longitudinal analysis...


{206: '2018-02-23',
 228: '2019-04-05',
 240: '2019-08-09',
 258: '2020-07-03',
 295: '2022-03-23',
 298: '2022-05-06',
 302: '2022-06-03',
 303: '2022-06-10',
 317: '2022-11-18',
 318: '2022-11-25',
 349: '2024-07-26',
 350: '2024-08-02',
 355: '2018-02-02',
 360: '2018-12-14',
 364: '2019-07-05',
 366: '2019-06-07',
 367: '2019-07-26',
 368: '2019-08-09',
 370: '2019-07-19',
 371: '2019-10-18',
 372: '2019-11-30',
 375: '2019-10-25',
 377: '2020-06-12',
 378: '2020-01-17',
 381: '2020-06-05',
 382: '2020-06-19',
 384: '2021-11-12',
 385: '2020-05-29',
 386: '2020-08-21',
 387: '2020-09-25',
 388: '2020-07-10',
 389: '2020-09-11',
 390: '2021-02-12',
 391: '2020-11-13',
 392: '2020-10-30',
 393: '2020-11-20',
 394: '2021-01-29',
 396: '2022-06-17',
 397: '2021-02-19',
 401: '2021-05-21',
 402: '2021-07-02',
 403: '2021-04-16',
 404: '2021-05-07',
 406: '2021-04-09',
 411: '2021-10-22',
 413: '2021-11-05',
 417: '2022-03-23',
 420: '2022-05-06',
 422: '2022-09-23',
 423: '2022-06-24',
