# UKB -- ALL CODES

In [None]:
import pandas as pd
import numpy as np

In [None]:
# # # # # #Download 
#!dx download data/files_for_cox/AD_with_tenure_OCT_09_24.csv 
#!dx download data/files_for_cox/PD_with_tenure_OCT_09_24.csv 
#!dx download data/files_for_cox/DEM_with_tenure_OCT_09_24.csv 

#! dx download -r data/UPDATED_ICD10_dates/

In [None]:
#list of icd10_codes
# Note we removed a few from the previous list because they were removed from controls, had less than 20 individuals, or data was not available in AoU
icd10_list = ['A04', 'B02', 'B37', 'E10', 'E11', 'E27', 'E78', 'E87', 'F20', 'F31', 'F32', 'F33', 'F40', 'F41', 'F42', 'F43', 'F44', 'F45', 'F48', 'F50', 'F51', 'G40', 'G43', 'G47', 'H66', 'I10', 'I11', 'I12', 'I15', 'I20', 'I21', 'I25', 'I47', 'I48', 'I49', 'I50', 'I60', 'I61', 'I62', 'I63', 'I64', 'I65', 'I66', 'I67', 'I69', 'I82', 'K04', 'K05', 'K20', 'K21', 'K22', 'K25', 'K31', 'K51', 'K59', 'K70', 'K71', 'K72', 'K73', 'K74', 'K75', 'K76', 'L25', 'L40', 'L50', 'M06', 'M13', 'M15', 'M16', 'M17', 'M18', 'M19', 'M32', 'M45', 'M79', 'M80', 'M81', 'M88', 'N04', 'N10', 'N18', 'N19', 'N30', 'N31', 'N32', 'N39', 'N40', 'N94']
print(len(icd10_list))
print(icd10_list)

In [None]:
# Here we remove codes that don't have a date or occur before 1999 or after 2018 -- this is to match our medication data.
for code in icd10_list:
    print(code)
    df = pd.read_csv(f'UPDATED_ICD10_dates/{code}_with_date.csv')
    #df = pd.read_csv(f'{code}_with_date.csv')
    print(len(df))
    df = df[df[code] != "Code has event date matching participant's date of birth"]
    df = df[df[code] != "Code has event date after participant's date of birth and falls in the same calendar year as date of birth"]
    df = df[df[code] != "Code has event date in the future and is presumed to be a place-holder or other system default {2037-07-07}"]
    df['year'] = df[f'{code}'].str.split('-', expand=True)[0]
    df['year'] = df['year'].astype(int)
    df = df[df['year'] < 2018]
    df = df[df['year'] >= 1999]
    df = df[['ID', f'{code}']]
    print(len(df))
    df.to_csv(f'{code}_with_date_1999.csv', header = True, index = False)

In [None]:
# Look at one file
test = pd.read_csv(f'M81_with_date_1999.csv')
test

# Add covariates to original dataframe

In [None]:
# Load df -- need to run notebook for all NDDs: AD, PD, and DEM
ndd = 'AD'
df = pd.read_csv(f'{ndd}_with_tenure_OCT_09_24.csv', low_memory = False)
df

In [None]:
print(len(icd10_list))

In [None]:
# Add ICD10 data to the orignial df
for code in icd10_list:
    c = pd.read_csv(f'{code}_with_date_1999.csv')
    df = df.merge(c, left_on = 'ID', right_on = 'ID', how = 'left')
    print(len(df))

In [None]:
# Check to see how many ICD10 values we had for each code
for code in icd10_list:
    print(code)
    print(df[f'{code}'].isna().value_counts())

In [None]:
#We only want to include an ICD10 code as a covariate if it was recorded BEFORE the end of the study
for code in icd10_list:
    df['Lag_' + code] = (pd.to_datetime(df[code], errors = 'coerce') - pd.to_datetime(df['recruit_year'], format = '%Y')).dt.days/365
        
    #Select data if it happened before study end -- lag 0
    df['QC0_' + code] = np.where((df['Lag_' + code] < df['tenure']), 1, 0)

In [None]:
# Check to make sure we have at least 20 samples for each ICD10 code
remove_list = []
for n in icd10_list:
    test = df[df[f'QC0_{n}']==1]
    print(n)
    print(len(test))
    if len(test) < 20:
        remove_list.append(n)
    else:
        pass

In [None]:
remove_list

# Add APOE status

In [None]:
#! dx download apoe/apoe_snps_genotypes.APOE_GENOTYPES.csv

In [None]:
apoe = pd.read_csv('apoe_snps_genotypes.APOE_GENOTYPES.csv')
apoe = apoe[['IID', 'APOE_GENOTYPE']]
apoe

In [None]:
# Merge APOE data with df
df = df.merge(apoe, left_on = 'ID', right_on = 'IID', how = 'left')
df

In [None]:
df.APOE_GENOTYPE.value_counts(dropna = False)

In [None]:
# One-hot encode the 'APOE_GENOTYPE' column
df_encoded = pd.get_dummies(df, columns=['APOE_GENOTYPE'], prefix='', prefix_sep='')

# Display the result
df_encoded.columns

In [None]:
df_encoded

# Add 10+ drug lists to main file -- needed for Table 4

In [None]:
drugs_list = ['moxifloxacin', 'fexofenadine', 'lormetazepam', 'nizatidine', 'lacidipine', 'rifaximin', 'levomepromazine', 'losartan', 'metformin', 'colchicine', 'valproicacid', 'acetylcysteine', 'phenoxymethylpenicillin', 'erdosteine', 'montelukast', 'sildenafil', 'amiodarone', 'clomethiazole', 'gemfibrozil', 'alogliptin', 'mirtazapine', 'aclidiniumbromide', 'clomipramine', 'nebivolol', 'minoxidil', 'clonidine', 'alfuzosin', 'rizatriptan', 'mecysteine', 'rimonabant', 'naratriptan', 'paracetamolandmetoclopramide', 'orlistat', 'griseofulvin', 'telmisartan', 'pravastatin', 'zaleplon', 'piroxicam', 'apixaban', 'amphotericin', 'linagliptin', 'amlodipine', 'diltiazem', 'nortriptyline', 'potassiumchlorideandpotassiumbicarbonate', 'propantheline', 'hydrotalcite', 'alprazolam', 'indomethacin', 'ciclesonide', 'nabumetone', 'perindopril', 'digoxin', 'oxerutins', 'amisulpride', 'bisacodyl', 'carbocisteine', 'gabapentin', 'misoprostol', 'ticagrelor', 'apomorphine', 'tetrabenazine', 'tetracycline', 'nalidixicacid', 'methylcellulose', 'flucloxacillin', 'epogam', 'mefenamic', 'dantrolene', 'cyproheptadine', 'phenylephrine', 'terbutaline', 'fentanyl', 'fluoxetine', 'moxisylyte', 'zonisamide', 'doxepin', 'sodiumvalproate', 'thioridazine', 'clomifene', 'fesoterodine', 'clonazepam', 'orciprenaline', 'fusidicacid', 'metronidazole', 'tramadol', 'ibandronicacid', 'gliclazide', 'naltrexone', 'chlortalidone', 'ciprofloxacin', 'orphenadrine', 'sulpiride', 'aminophylline', 'aspirin', 'glimepiride', 'budesonide', 'eprosartan', 'carvedilol', 'rupatadine', 'acemetacin', 'ibuprofen', 'aceclofenac', 'ketorolac', 'glyceryltrinitrate', 'olsalazine', 'zafirlukast', 'flurbiprofen', 'trimethoprim', 'cefixime', 'fosinopril', 'inositolnicotinate', 'entacapone', 'lercanidipine', 'acetazolamide', 'indapamide', 'metoprolol', 'febuxostat', 'captopril', 'disulfiram', 'propylthiouracil', 'tolbutamide', 'ezetimibe', 'risedronate', 'bisoprolol', 'pantoprazole', 'flavoxate', 'cinnarizine', 'nateglinide', 'hydrochlorothiazideandramipril', 'magnesium', 'eletriptan', 'trifluoperazine', 'chloramphenicol', 'alimemazine', 'moxonidine', 'oxcarbazepine', 'cetirizine', 'pseudoephedrine', 'albuterol', 'hydroxychloroquine', 'trimipramine', 'trimethoprimandsulfamethoxazole', 'nifedipine', 'sotalol', 'hyoscinebutylbromide', 'loratadine', 'erythromycin', 'naftidrofuryl', 'modafinil', 'esomeprazole', 'cisapride', 'oxytetracycline', 'tinidazole', 'sumatriptan', 'calcitriol', 'ranitidine', 'leflunomide', 'famciclovir', 'oxybutynin', 'perphenazine', 'ropinirole', 'ursodeoxycholicacid', 'citalopram', 'riluzole', 'darifenacin', 'buspirone', 'alendronatesodium', 'theophylline', 'paracetamolandcodeine', 'sulfasalazine', 'pre-exposureprophylaxis', 'lamotrigine', 'dexamethasone', 'stalevo', 'norfloxacin', 'ofloxacin', 'mianserin', 'nitrofurantoin', 'memantine', 'lorazepam', 'anticholinergic', 'midazolam', 'dronedarone', 'enalapril', 'fluticasone', 'lithium', 'mebendazole', 'diclofenac', 'quinapril', 'itraconazole', 'pyridostigmine', 'methylphenidate', 'trihexyphenidyl', 'co-amilofruse', 'mizolastine', 'irbesartan', 'glipizide', 'acamprosate', 'cyproterone', 'methotrexate', 'tolfenamicacid', 'prasugrel', 'mefloquine', 'methylprednisolone', 'isotretinoin', 'ibuprofenandcodeine', 'letrozole', 'nicardipine', 'magnesiumhydroxide', 'vancomycin', 'ipratropium', 'prednisone', 'potassiumchloride', 'saxagliptin', 'risperidone', 'dipipanone', 'tadalafil', 'astemizole', 'simvastatin', 'acarbose', 'quinine', 'levothyroxine', 'triamterene', 'nystatin', 'chlordiazepoxide', 'pramipexole', 'dihydrocodeine', 'allopurinol', 'terfenadine', 'mesalazine', 'hydrocortisonebuccaltablets', 'doxycycline', 'trandolapril', 'dicyclomine', 'asilone', 'nadolol', 'frovatriptan', 'ranolazine', 'azithromycin', 'cholestyramine', 'zopiclone', 'vigabatrin', 'tiotropium', 'bromocriptine', 'tiaprofenicacid', 'lidocaine', 'phenelzine', 'levofloxacin', 'diflunisal', 'hydralazine', 'acitretin', 'metoclopramideandaspirin', 'ciclosporin', 'methyldopa', 'tripotassiumdicitratobismuthate', 'galantamine', 'cyclobenzaprine', 'codeine', 'rifampicinandisoniazid', 'valaciclovir', 'phenytoin', 'co-amilozide', 'etoricoxib', 'sulindac', 'liothyronine', 'atenolol', 'cinacalcet', 'co-beneldopa', 'lansoprazole', 'indoramin', 'fenofibrate', 'zolmitriptan', 'hydrochlorothiazideandlisinopril', 'trospium', 'sucralfate', 'etamsylate', 'eplerenone', 'cromolynsodium', 'benzydamine', 'atorvastatin', 'cefalexin', 'minocycline', 'hydroxycarbamide', 'pioglitazone', 'rasagiline', 'carbamazepine', 'pivmecillinam', 'isosorbidedinitrate', 'formoterol', 'pentoxifylline', 'cabergoline', 'diazepam', 'furosemide', 'amiloride', 'tranexamicacid', 'ramipril', 'lofepramine', 'etidronicacid', 'promazine', 'aliskren', 'penicillamine', 'chlorpheniramine', 'prochlorperazine', 'bismuthsubsalicylate', 'cyclopenthiazide', 'finasteride', 'ironandfolicacid', 'penicillin', 'cyclizine', 'sertraline', 'ampicillin', 'dexketoprofen', 'nefazodone', 'solifenacin', 'felodipine', 'fluvoxamine', 'bupropion', 'tramacet', 'amoxicillin', 'tolterodine', 'isosorbidemononitrateandisosorbidedinitrate', 'betamethasone', 'cefradine', 'co-amoxiclav', 'clarithromycin', 'indometacin', 'almotriptan', 'escitalopram', 'chloroquine', 'ketotifen', 'oseltamivir', 'tamsulosin', 'olanzapine', 'levocetirizine', 'fluphenazine', 'chloralhydrate', 'tizanidine', 'cerivastatin', 'pholcodine', 'bezafibrate', 'methadone', 'bendroflumethiazide', 'methocarbamol', 'rivaroxaban', 'dosulepin', 'verapamil', 'mirabegron', 'salbutamol', 'amitriptyline', 'omeprazole', 'baclofen', 'fishoil', 'venlafaxine', 'meptazinol', 'cefpodoxime', 'fludrocortisone', 'levetiracetam', 'rosiglitazone', 'isoniazid', 'empagliflozin', 'carbimazole', 'lisinopril', 'doxazosin', 'co-magaldrox', 'temazepam', 'imipramine', 'gaviscon', 'mycophenolicacid', 'cefadroxil', 'methenamine', 'nicorandil', 'loperamide', 'mometasone', 'sterculia', 'cimetidine', 'hydroxyurea', 'celecoxib', 'meperidine', 'imidapril', 'terazosin', 'atropine', 'flecainide', 'thyroxine', 'hyoscinehydrobromide', 'dantronanddocusate', 'dabigatran', 'fluconazole', 'prucalopride', 'mebeverine', 'macrogol', 'lactulose', 'glibenclamide', 'sibutramine', 'meloxicam', 'valdecoxib', 'manevac', 'topiramate', 'ketoprofen', 'primidone', 'paroxetine', 'acrivastine', 'sodiumfusidate', 'strontium', 'zolpidem', 'oxprenolol', 'sulfinpyrazone', 'carisoprodol', 'pizotifen', 'clemastine', 'rivastigmine', 'bumetanide', 'agomelatine', 'famotidine', 'reboxetine', 'pregabalin', 'alprostadil', 'haloperidol', 'candesartan', 'kaolin', 'ivabradine', 'linaclotide', 'atenololandnifedipine', 'repaglinide', 'selegiline', 'olmesartan', 'oxazepam', 'paracetamol', 'dutasteride', 'prazosin', 'sodiumcromoglicate', 'amantadine', 'nefopam', 'mercaptopurine', 'insulin', 'docusate', 'buprenorphine', 'isosorbidemononitrate', 'chlorphenamine', 'diphenhydramine', 'valganciclovir', 'azathioprine', 'proguanil', 'propranolol', 'prednisolone', 'immunoglobin', 'metoclopramide', 'celiprolol', 'rofecoxib', 'trazodone', 'tamoxifen', 'raloxifene', 'moclobemide', 'metolazone', 'omega-3', 'dipyridamole', 'alverine', 'alendronicacid', 'quetiapine', 'duloxetine', 'simeticone', 'beclometasone', 'azapropazone', 'lymecycline', 'aripiprazole', 'domperidone', 'rabeprazole', 'hydrocortisone', 'phenobarbital', 'betahistine', 'salmeterol', 'hydroxyzine', 'loprazolam', 'prilocaine', 'triamcinolone', 'zoledronicacid', 'anastrozole', 'fybogel', 'canagliflozin', 'clopidogrel', 'balsalazide', 'vardenafil', 'lumiracoxib', 'labetalol', 'brompheniramine', 'danazol', 'clodronicacid', 'warfarin', 'methysergide', 'hydrochlorothiazide', 'sodiumpicosulfate', 'ondansetron', 'adalimumab', 'rifampicin', 'tenoxicam', 'colesevelam', 'ciprofibrate', 'chlorpromazine', 'pancrelipase', 'co-danthrusate', 'disopyramide', 'oxycodone', 'valsartan', 'nitrazepam', 'tacrolimus', 'lacosamide', 'flupentixol', 'cyclophosphamide', 'nitrousoxide', 'ethambutol', 'rosuvastatin', 'clobazam', 'donepezil', 'promethazine', 'spironolactone', 'co-careldopa', 'clindamycin', 'naproxen', 'senna', 'cefaclor', 'fluvastatin', 'propiverine', 'timolol', 'dutasterideandtamsulosin', 'dapagliflozin', 'migril', 'pilocarpine', 'aciclovir', 'morphine', 'sevelamer', 'etodolac', 'dapsone', 'sitagliptin', 'nedocromil', 'propafenone', 'oxitropium', 'cefuroxime']
print(len(drugs_list))

In [None]:
for code in drugs_list:
    
    #Select data only 10+ years before study end
    df_encoded['QC10+_' + code] = np.where((df_encoded['tenure'] - df_encoded['Lag_' + code] > 10), 1, 0)

In [None]:
df_encoded

In [None]:
# Remove some of the columns we don't need to make the df a more reasonable size
list_columns = list(df_encoded.columns)
cleaned_list1 = [x for x in list_columns if not x.startswith('Lag')]
cleaned_list2 = [x for x in cleaned_list1 if not x.endswith('DATE')]
print(len(cleaned_list2))
#print(cleaned_list2)

In [None]:
df_encoded2 = df_encoded[cleaned_list2]
df_encoded2

In [None]:
df_encoded2.to_csv(f'{ndd}_with_icd10_APOE_MAY_05_2025.csv', header = True, index=False)

In [None]:
!dx upload {ndd}_with_icd10_APOE_MAY_05_2025.csv --path /data/files_for_cox/{ndd}_with_icd10_APOE_MAY_05_2025.csv

In [None]:
! dx download data/files_for_cox/AD_with_icd10_APOE_MAY_05_2025.csv