# Pulling ICD10 codes

## Need Spark Notebook 

In [None]:
# This is Table 1 from the paper -- it contains the ICD10 codes we are using to adjust for each types of medication
! dx download 'data/Table 1_ Classes and types of medications associated with risk of NDDs - new_table1.csv'

# This is a file listed all the ICD10 field IDs in UKB to make pulling the data easier. If replicating this study, you can use the field IDs we pull below.
! dx download 'data/all_field_ids_UKB.csv'

In [None]:
import pandas as pd
import numpy as np

# load list of all ICD10 field IDs
test = pd.read_csv('all_field_ids_UKB.csv')
test['Field ID'] = test['Field ID'].astype(str)

#These fields had no data in UKB at time of study
no_data = ['130032', '130056', '130098', '130110', '130166', '130172', '130182', '130238', '130268', '130278', '130290', '130294', '130332', '130690', '130754', '130834', '130956', '131034', '131510', '131752']
test = test[~test['Field ID'].isin(no_data)]
test

In [None]:
#Load codes from table 1
df = pd.read_csv('Table 1_ Classes and types of medications associated with risk of NDDs - new_table1.csv')
df = df[~df['conditions'].isna()]
df_list = list(df['ICD10'])

# Flatten and remove duplicates
flattened = set(code.strip() for entry in df_list for code in entry.split(','))

# Convert back to a sorted list if desired
unique_codes = sorted(flattened)
print(len(unique_codes))
print(unique_codes)

In [None]:
import pandas as pd
# I16 not in UKB
test = pd.read_csv('all_field_ids_UKB.csv')
test = test[test['ICD10'].isin(unique_codes)]
test

In [None]:
# Save df of ICD10 codes
test.to_csv('ICD10_codes_for_med_project.csv', header = True, index = False)

In [None]:
# We need to add a 'p' to the beginning of each code in order to pull the files in UKB.
data_list = list(test['Field ID'])
field_names = ['eid']
for i in data_list:
    a = 'p' + str(i)
    field_names.append(a)

print(len(field_names))

In [None]:
#setup - packages & env
import pyspark
import dxdata
import dxpy
sc = pyspark.SparkContext()
spark = pyspark.sql.SparkSession(sc)

In [None]:
#setup - grabbing dataset
dispensed_database_name = dxpy.find_one_data_object(classname="database", name="app*", folder="/", name_mode="glob", describe=True)["describe"]["name"]
dispensed_dataset_id = dxpy.find_one_data_object(typename="Dataset", name="app*.dataset", folder="/", name_mode="glob")["id"]

In [None]:
# the participant dataset is the one we ultimately want to work with 
dataset = dxdata.load_dataset(id=dispensed_dataset_id)
participant = dataset["participant"]

In [None]:
# Pull down the fields we need 
df = participant.retrieve_fields(names=field_names, coding_values="replace", engine=dxdata.connect())

In [None]:
# Send to Pandas 
df = df.toPandas()
df

In [None]:
# creates a printout of human-readable columns
code_list = list(test['ICD10'])
for i in range(len(test)):
    print("'" + field_names[i+1] +"'" + ':' + "'" +code_list[i]+"'"+",")

In [None]:
# Human readable columns please -- adding in the ICD10 code
df = df.rename(columns={'eid':'ID',
'p130008':'A04',
'p130178':'B02',
'p130230':'B37',
'p130706':'E10',
'p130708':'E11',
'p130730':'E25',
'p130734':'E27',
'p130814':'E78',
'p130828':'E87',
'p130874':'F20',
'p130892':'F31',
'p130894':'F32',
'p130896':'F33',
'p130904':'F40',
'p130906':'F41',
'p130908':'F42',
'p130910':'F43',
'p130912':'F44',
'p130914':'F45',
'p130916':'F48',
'p130918':'F50',
'p130920':'F51',
'p131032':'G25',
'p131042':'G35',
'p131048':'G40',
'p131052':'G43',
'p131060':'G47',
'p131102':'G81',
'p131230':'H66',
'p131286':'I10',
'p131288':'I11',
'p131290':'I12',
'p131292':'I13',
'p131294':'I15',
'p131296':'I20',
'p131298':'I21',
'p131306':'I25',
'p131348':'I47',
'p131350':'I48',
'p131352':'I49',
'p131354':'I50',
'p131360':'I60',
'p131362':'I61',
'p131364':'I62',
'p131366':'I63',
'p131368':'I64',
'p131370':'I65',
'p131372':'I66',
'p131374':'I67',
'p131376':'I68',
'p131378':'I69',
'p131400':'I82',
'p131560':'K04',
'p131562':'K05',
'p131582':'K20',
'p131584':'K21',
'p131586':'K22',
'p131590':'K25',
'p131602':'K31',
'p131628':'K51',
'p131640':'K59',
'p131658':'K70',
'p131660':'K71',
'p131662':'K72',
'p131664':'K73',
'p131666':'K74',
'p131668':'K75',
'p131670':'K76',
'p131672':'K77',
'p131730':'L25',
'p131742':'L40',
'p131754':'L50',
'p131850':'M06',
'p131864':'M13',
'p131868':'M15',
'p131870':'M16',
'p131872':'M17',
'p131874':'M18',
'p131876':'M19',
'p131894':'M32',
'p131912':'M45',
'p131960':'M79',
'p131962':'M80',
'p131964':'M81',
'p131966':'M82',
'p131978':'M88',
'p132006':'N04',
'p132016':'N10',
'p132032':'N18',
'p132034':'N19',
'p132054':'N30',
'p132056':'N31',
'p132058':'N32',
'p132070':'N39',
'p132072':'N40',
'p132150':'N94',})

df

In [None]:
print(len(code_list))

In [None]:
# save each code as an individual ICD10 file
for group in code_list:
    test = df[~df[f'{group}'].isna()]
    test = test[['ID', group]]
    print(group, len(test))
    test.to_csv(f'{group}_with_date.csv', header = True, index = None)

In [None]:
# upload all files
for group in code_list:
    print(f'!dx upload {group}_with_date.csv --path data/UPDATED_ICD10_dates/{group}_with_date.csv')

In [None]:
!dx upload A04_with_date.csv --path data/UPDATED_ICD10_dates/A04_with_date.csv
!dx upload B02_with_date.csv --path data/UPDATED_ICD10_dates/B02_with_date.csv
!dx upload B37_with_date.csv --path data/UPDATED_ICD10_dates/B37_with_date.csv
!dx upload E10_with_date.csv --path data/UPDATED_ICD10_dates/E10_with_date.csv
!dx upload E11_with_date.csv --path data/UPDATED_ICD10_dates/E11_with_date.csv
!dx upload E25_with_date.csv --path data/UPDATED_ICD10_dates/E25_with_date.csv
!dx upload E27_with_date.csv --path data/UPDATED_ICD10_dates/E27_with_date.csv
!dx upload E78_with_date.csv --path data/UPDATED_ICD10_dates/E78_with_date.csv
!dx upload E87_with_date.csv --path data/UPDATED_ICD10_dates/E87_with_date.csv
!dx upload F20_with_date.csv --path data/UPDATED_ICD10_dates/F20_with_date.csv
!dx upload F31_with_date.csv --path data/UPDATED_ICD10_dates/F31_with_date.csv
!dx upload F32_with_date.csv --path data/UPDATED_ICD10_dates/F32_with_date.csv
!dx upload F33_with_date.csv --path data/UPDATED_ICD10_dates/F33_with_date.csv
!dx upload F40_with_date.csv --path data/UPDATED_ICD10_dates/F40_with_date.csv
!dx upload F41_with_date.csv --path data/UPDATED_ICD10_dates/F41_with_date.csv
!dx upload F42_with_date.csv --path data/UPDATED_ICD10_dates/F42_with_date.csv
!dx upload F43_with_date.csv --path data/UPDATED_ICD10_dates/F43_with_date.csv
!dx upload F44_with_date.csv --path data/UPDATED_ICD10_dates/F44_with_date.csv
!dx upload F45_with_date.csv --path data/UPDATED_ICD10_dates/F45_with_date.csv
!dx upload F48_with_date.csv --path data/UPDATED_ICD10_dates/F48_with_date.csv
!dx upload F50_with_date.csv --path data/UPDATED_ICD10_dates/F50_with_date.csv
!dx upload F51_with_date.csv --path data/UPDATED_ICD10_dates/F51_with_date.csv
!dx upload G25_with_date.csv --path data/UPDATED_ICD10_dates/G25_with_date.csv
!dx upload G35_with_date.csv --path data/UPDATED_ICD10_dates/G35_with_date.csv
!dx upload G40_with_date.csv --path data/UPDATED_ICD10_dates/G40_with_date.csv
!dx upload G43_with_date.csv --path data/UPDATED_ICD10_dates/G43_with_date.csv
!dx upload G47_with_date.csv --path data/UPDATED_ICD10_dates/G47_with_date.csv
!dx upload G81_with_date.csv --path data/UPDATED_ICD10_dates/G81_with_date.csv
!dx upload H66_with_date.csv --path data/UPDATED_ICD10_dates/H66_with_date.csv
!dx upload I10_with_date.csv --path data/UPDATED_ICD10_dates/I10_with_date.csv
!dx upload I11_with_date.csv --path data/UPDATED_ICD10_dates/I11_with_date.csv
!dx upload I12_with_date.csv --path data/UPDATED_ICD10_dates/I12_with_date.csv
!dx upload I13_with_date.csv --path data/UPDATED_ICD10_dates/I13_with_date.csv
!dx upload I15_with_date.csv --path data/UPDATED_ICD10_dates/I15_with_date.csv
!dx upload I20_with_date.csv --path data/UPDATED_ICD10_dates/I20_with_date.csv
!dx upload I21_with_date.csv --path data/UPDATED_ICD10_dates/I21_with_date.csv
!dx upload I25_with_date.csv --path data/UPDATED_ICD10_dates/I25_with_date.csv
!dx upload I47_with_date.csv --path data/UPDATED_ICD10_dates/I47_with_date.csv
!dx upload I48_with_date.csv --path data/UPDATED_ICD10_dates/I48_with_date.csv
!dx upload I49_with_date.csv --path data/UPDATED_ICD10_dates/I49_with_date.csv
!dx upload I50_with_date.csv --path data/UPDATED_ICD10_dates/I50_with_date.csv
!dx upload I60_with_date.csv --path data/UPDATED_ICD10_dates/I60_with_date.csv
!dx upload I61_with_date.csv --path data/UPDATED_ICD10_dates/I61_with_date.csv
!dx upload I62_with_date.csv --path data/UPDATED_ICD10_dates/I62_with_date.csv
!dx upload I63_with_date.csv --path data/UPDATED_ICD10_dates/I63_with_date.csv
!dx upload I64_with_date.csv --path data/UPDATED_ICD10_dates/I64_with_date.csv
!dx upload I65_with_date.csv --path data/UPDATED_ICD10_dates/I65_with_date.csv
!dx upload I66_with_date.csv --path data/UPDATED_ICD10_dates/I66_with_date.csv
!dx upload I67_with_date.csv --path data/UPDATED_ICD10_dates/I67_with_date.csv
!dx upload I68_with_date.csv --path data/UPDATED_ICD10_dates/I68_with_date.csv
!dx upload I69_with_date.csv --path data/UPDATED_ICD10_dates/I69_with_date.csv
!dx upload I82_with_date.csv --path data/UPDATED_ICD10_dates/I82_with_date.csv
!dx upload K04_with_date.csv --path data/UPDATED_ICD10_dates/K04_with_date.csv
!dx upload K05_with_date.csv --path data/UPDATED_ICD10_dates/K05_with_date.csv
!dx upload K20_with_date.csv --path data/UPDATED_ICD10_dates/K20_with_date.csv
!dx upload K21_with_date.csv --path data/UPDATED_ICD10_dates/K21_with_date.csv
!dx upload K22_with_date.csv --path data/UPDATED_ICD10_dates/K22_with_date.csv
!dx upload K25_with_date.csv --path data/UPDATED_ICD10_dates/K25_with_date.csv
!dx upload K31_with_date.csv --path data/UPDATED_ICD10_dates/K31_with_date.csv
!dx upload K51_with_date.csv --path data/UPDATED_ICD10_dates/K51_with_date.csv
!dx upload K59_with_date.csv --path data/UPDATED_ICD10_dates/K59_with_date.csv
!dx upload K70_with_date.csv --path data/UPDATED_ICD10_dates/K70_with_date.csv
!dx upload K71_with_date.csv --path data/UPDATED_ICD10_dates/K71_with_date.csv
!dx upload K72_with_date.csv --path data/UPDATED_ICD10_dates/K72_with_date.csv
!dx upload K73_with_date.csv --path data/UPDATED_ICD10_dates/K73_with_date.csv
!dx upload K74_with_date.csv --path data/UPDATED_ICD10_dates/K74_with_date.csv
!dx upload K75_with_date.csv --path data/UPDATED_ICD10_dates/K75_with_date.csv
!dx upload K76_with_date.csv --path data/UPDATED_ICD10_dates/K76_with_date.csv
!dx upload K77_with_date.csv --path data/UPDATED_ICD10_dates/K77_with_date.csv
!dx upload L25_with_date.csv --path data/UPDATED_ICD10_dates/L25_with_date.csv
!dx upload L40_with_date.csv --path data/UPDATED_ICD10_dates/L40_with_date.csv
!dx upload L50_with_date.csv --path data/UPDATED_ICD10_dates/L50_with_date.csv
!dx upload M06_with_date.csv --path data/UPDATED_ICD10_dates/M06_with_date.csv
!dx upload M13_with_date.csv --path data/UPDATED_ICD10_dates/M13_with_date.csv
!dx upload M15_with_date.csv --path data/UPDATED_ICD10_dates/M15_with_date.csv
!dx upload M16_with_date.csv --path data/UPDATED_ICD10_dates/M16_with_date.csv
!dx upload M17_with_date.csv --path data/UPDATED_ICD10_dates/M17_with_date.csv
!dx upload M18_with_date.csv --path data/UPDATED_ICD10_dates/M18_with_date.csv
!dx upload M19_with_date.csv --path data/UPDATED_ICD10_dates/M19_with_date.csv
!dx upload M32_with_date.csv --path data/UPDATED_ICD10_dates/M32_with_date.csv
!dx upload M45_with_date.csv --path data/UPDATED_ICD10_dates/M45_with_date.csv
!dx upload M79_with_date.csv --path data/UPDATED_ICD10_dates/M79_with_date.csv
!dx upload M80_with_date.csv --path data/UPDATED_ICD10_dates/M80_with_date.csv
!dx upload M81_with_date.csv --path data/UPDATED_ICD10_dates/M81_with_date.csv
!dx upload M82_with_date.csv --path data/UPDATED_ICD10_dates/M82_with_date.csv
!dx upload M88_with_date.csv --path data/UPDATED_ICD10_dates/M88_with_date.csv
!dx upload N04_with_date.csv --path data/UPDATED_ICD10_dates/N04_with_date.csv
!dx upload N10_with_date.csv --path data/UPDATED_ICD10_dates/N10_with_date.csv
!dx upload N18_with_date.csv --path data/UPDATED_ICD10_dates/N18_with_date.csv
!dx upload N19_with_date.csv --path data/UPDATED_ICD10_dates/N19_with_date.csv
!dx upload N30_with_date.csv --path data/UPDATED_ICD10_dates/N30_with_date.csv
!dx upload N31_with_date.csv --path data/UPDATED_ICD10_dates/N31_with_date.csv
!dx upload N32_with_date.csv --path data/UPDATED_ICD10_dates/N32_with_date.csv
!dx upload N39_with_date.csv --path data/UPDATED_ICD10_dates/N39_with_date.csv
!dx upload N40_with_date.csv --path data/UPDATED_ICD10_dates/N40_with_date.csv
!dx upload N94_with_date.csv --path data/UPDATED_ICD10_dates/N94_with_date.csv
