In [1]:
#!/usr/bin/env python
# -*-coding:utf-8 -*-
'''
@File    :   condition_specific_functions_for_difficult_diseases.ipynb
@Time    :   2023/08/21 11:55:10
@Author  :   Asra Aslam 
@Version :   1.0
@Contact :   a.aslam2@leeds.ac.uk
@License :   (C)Copyright Asra Aslam DynAIRX
@Desc    :   This file is specifically designed for alcohol conditon, where we need to extract 
             snomed codes from other splitted categories and compare with categories in efi file
             and report the remaining ones
'''

import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns

In [2]:
filepaths_source_database= "output/macular_degeneration/grouped_database_with_snomed_Visual Impairment and Blindness.csv" #"output/macular_degeneration/grouped_database_with_snomed_Cataract.csv" #"output/macular_degeneration/grouped_database_with_snomed_Macular Degeneration.csv"  
filepath_target_database="output/macular_degeneration/Remaining_Visual_Impairment2_not_in_Cataract.csv" #"output/macular_degeneration/Manual_Analysis_Visual_impairment_orig_before_reduce.csv" 
filepath_output_database_remaining_SNOMED="output/macular_degeneration/Remaining_Visual_Impairment3_not_in_Visual_Impairment_and_Blindness.csv"

In [3]:
df_database_target = pd.read_csv(filepath_target_database, dtype={'SnomedCTConceptId': str}) 
df_database_target

Unnamed: 0,SnomedCTConceptId,Term,Disease,Otherinstructions,origin
0,13973009,Grand mal status,Seizures,,['efi']
1,161480008,H/O: epilepsy,Seizures,,['efi']
2,16873003,Musicogenic epilepsy,Seizures,,['efi']
3,170702005,Epilepsy monitoring,Seizures,,['efi']
4,170703000,Initial epilepsy assessment,Seizures,,['efi']
...,...,...,...,...,...
119,88491000000102,Epilepsy limits activities,Seizures,,['efi']
120,91175000,[D]Convulsion NOS,Seizures,,['efi']
121,93211000000101,Epilepsy does not limit activities,Seizures,,['efi']
122,93401000000104,Did not attend epilepsy clinic,Seizures,,['efi']


In [4]:
# rename SNOMEDCT_CONCEPTID, Deficit and Codedescription column in efi list same as LW list
# df_database_target.rename(columns={'SNOMEDCT_CONCEPTID':'SnomedCTConceptId'}, inplace=True)
# df_database_target.rename(columns={'Codedescription':'Term'}, inplace=True)
# df_database_target.rename(columns={'Deficit':'Disease'}, inplace=True)
df_database_target.columns

Index(['SnomedCTConceptId', 'Term', 'Disease', 'Otherinstructions', 'origin'], dtype='object')

In [5]:
df_database_target.dtypes

SnomedCTConceptId     object
Term                  object
Disease               object
Otherinstructions    float64
origin                object
dtype: object

In [6]:
df_database_source = pd.read_csv(filepaths_source_database, dtype={'SnomedCTConceptId': str}) 
df_database_source

Unnamed: 0,SnomedCTConceptId,Term,Disease,Otherinstructions,origin
0,108211000000104,Epilepsy impairs education,Epilepsy,,['Lauren_codelist']
1,13973009,Grand mal status,Epilepsy,,['Lauren_codelist']
2,162658001,Epilepsy resolved,Epilepsy,,['Lauren_codelist']
3,170702005,Epilepsy monitoring,Epilepsy,,['Lauren_codelist']
4,170702005,Epilepsy monitoring NOS,Epilepsy,,['Lauren_codelist']
...,...,...,...,...,...
95,84757009,[X]Other generalized epilepsy and epileptic sy...,Epilepsy,,['Lauren_codelist']
96,88491000000102,Epilepsy limits activities,Epilepsy,,['Lauren_codelist']
97,89525009,Gelastic epilepsy,Epilepsy,,['Lauren_codelist']
98,93401000000104,DNA - Did not attend epilepsy clinic,Epilepsy,,['Lauren_codelist']


In [7]:
df_database_source.SnomedCTConceptId

0     108211000000104
1            13973009
2           162658001
3           170702005
4           170702005
           ...       
95           84757009
96     88491000000102
97           89525009
98     93401000000104
99           95208000
Name: SnomedCTConceptId, Length: 100, dtype: object

In [8]:
df_database_target.SnomedCTConceptId.isin(df_database_source.SnomedCTConceptId)

0       True
1      False
2      False
3       True
4      False
       ...  
119     True
120    False
121    False
122     True
123     True
Name: SnomedCTConceptId, Length: 124, dtype: bool

In [9]:
df_remaining_rows=df_database_target[~df_database_target.SnomedCTConceptId.isin(df_database_source.SnomedCTConceptId)]
df_remaining_rows

Unnamed: 0,SnomedCTConceptId,Term,Disease,Otherinstructions,origin
1,161480008,H/O: epilepsy,Seizures,,['efi']
2,16873003,Musicogenic epilepsy,Seizures,,['efi']
4,170703000,Initial epilepsy assessment,Seizures,,['efi']
5,170704006,Follow-up epilepsy assessment,Seizures,,['efi']
6,170706008,Epilepsy associated problems,Seizures,,['efi']
...,...,...,...,...,...
113,71831005,Symptomatic generalised epilepsy,Seizures,,['efi']
115,7689009,Reading epilepsy,Seizures,,['efi']
117,840261000000103,Epilepsy management plan given,Seizures,,['efi']
120,91175000,[D]Convulsion NOS,Seizures,,['efi']


In [10]:
df_remaining_rows.to_csv(filepath_output_database_remaining_SNOMED, index=False) 