## Return diagnosis icd 9 & 10 info.

Reading icd codes from csv file

Import file -> CSV\Imports\icd_thrombo.csv

Export file -> CSV\Exports\stroke_diagnosis.csv

--------------------------------------------
In this part of the code we create
a file with the admissions that have
been diagnosed with the disease that
we have declared through the ICD codes
in the CSV file. At the end of the code,
the file is produced with the unique admissions
and not the movements within the hospital units.
If I want to search for another disease
I have to change the path of the icd variable
which you find five lines below.

In [None]:
import gc
import pandas as pd

# Define the path to the specific ICD codes CSV file
icd = r'CSV\Imports\icd_stroke.csv'

# Define the path to the compressed MIMIC-IV 3.0 diagnoses ICD file
compressed_file_path = r'..\00_Datasets\mimic-iv-3_0\hosp\diagnoses_icd.csv.gz'

try:    
    # Read the compressed CSV file into a DataFrame
    df = pd.read_csv(compressed_file_path, compression='gzip')
    
    # Read the specific ICD codes from a CSV file
    specific_icd_numbers = pd.read_csv(icd, header=None)
    
    # Convert the list of ICD codes to a list of strings
    specific_icd_numbers = specific_icd_numbers[0].astype(str).tolist()
    
    # Filter the DataFrame based on the specific ICD codes
    filtered_df = df[df['icd_code'].isin(specific_icd_numbers)]
    
    # Export the filtered DataFrame to a CSV file
    filtered_df.to_csv(r'CSV\Exports\o01_diagnosis.csv', index=False)
    
except FileNotFoundError:
    print("File not found.")
except Exception as e:
    print("An error occurred:", e)

# Free RAM
gc.collect()

In [2]:
import gc
import pandas as pd
import logging

# logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# CAUTION, we must put the csv with the specific icd codes we look for
icd_path = r'CSV\Imports\icd_stroke.csv'

# Define the path to the compressed MIMIC-IV 3.0 diagnoses ICD file
compressed_file_path = r'..\00_Datasets\mimic-iv-3_0\hosp\diagnoses_icd.csv.gz'

try:
    # Read the ICD codes from CSV file
    specific_icd_numbers = pd.read_csv(icd_path, header=None, usecols=[0], names=['icd_code'])
    
    # Convert the list of ICD codes to strings
    specific_icd_numbers = specific_icd_numbers['icd_code'].astype(str).tolist()
    
    # Read the compressed CSV file in chunks for large file handling
    chunksize = 100000  # My computer can handle 100.000 chunksize
    filtered_chunks = []
    
    for chunk in pd.read_csv(compressed_file_path, compression='gzip', chunksize=chunksize):
        # Filter each chunk based on the ICD codes
        filtered_chunk = chunk[chunk['icd_code'].isin(specific_icd_numbers)]
        filtered_chunks.append(filtered_chunk)
    
    # Concatenate all filtered chunks into one DataFrame
    filtered_df = pd.concat(filtered_chunks, ignore_index=True)
    
    # Export the filtered DataFrame to a CSV file
    filtered_df.to_csv(r'CSV\Exports\o01_diagnosis.csv', index=False)
    
    logging.info("Filtered data exported successfully.")

except FileNotFoundError as fnf_error:
    logging.error(f"File not found: {fnf_error}")
except pd.errors.ParserError as parse_error:
    logging.error(f"Error parsing the file: {parse_error}")
except Exception as e:
    logging.error(f"An unexpected error occurred: {e}")

# Free RAM if necessary
gc.collect()

2024-10-22 22:15:28,044 - INFO - Filtered data exported successfully.


0