In [None]:
import pandas as pd
from Scripts.medmod.exclude_symile import *

# get MIMIC-IV CSVs from https://physionet.org/files/mimiciv/3.1/
# clone https://github.com/mspancho/MedMod fork 
# follow steps 1 and 2 in MedMod/mimic4extract/README.md to extract MedMod dataset from MIMIC-IV CSVs into .../src/data/root
symile_train_fp = "../src/data/train/"
medmod_fp = "../src/data/root"

symile_df = load_symile(symile_train_fp)
medmod_df = load_medmod(medmod_fp)

common_hadms, subject_ids = exclude_symile(medmod_df, symile_df)

print("Starting first pass exclusion")
first_pass(common_hadms, subject_ids)

print("Starting second pass cleanup")
second_pass(subject_ids)

print("Updating metadata files")
update_metadata(common_hadms, subject_ids)

In [None]:
from Scripts.medmod.link_mimic_cxr_jpg import *

# extract the MIMIC-CXR-JPG metadata from https://physionet.org/files/mimic-cxr-jpg/2.1.0/mimic-cxr-2.0.0-metadata.csv.gz
mimic_cxr_fp = "../src/physionet.org/files/mimic-cxr-jpg/2.1.0/mimic-cxr-2.0.0-metadata.csv" 

medmod_df = load_medmod(medmod_fp)
mimic_cxr_meta_df = load_mimic_cxr_meta(mimic_cxr_fp)

link_medmod_to_cxr(medmod_df, mimic_cxr_meta_df)

filtered = medmod_df[medmod_df['cxr_path'].isin(["No CXR found", "No CXR matched admission"])]
print(f"{len(filtered)} rows without matched CXRs")

# Drop rows without matched CXRs
medmod_df = medmod_df[~medmod_df['cxr_path'].isin(["No CXR found", "No CXR matched admission"])]

# Save updated MedMod dataframe with CXR paths
medmod_df.to_csv(os.path.join(medmod_fp, "medmod_final.csv"), index=False)

In [None]:
from Scripts.medmod.query_mimic_cxr_jpg import *
import os
import subprocess

cmd = [
    "wget",
    "-r", "-N", "-c", "-np",
    "--user=mspancho",
    "--password=MIMICsp27!",
    "https://physionet.org/files/mimic-cxr-jpg/2.1.0/files/"
]

medmod_df = pd.read_csv(medmod_fp)

In [None]:
for row in medmod_df.itertuples():
    cmd_cxr = cmd.copy()
    cxr_path = str(row.cxr_path)
    cmd_cxr[7] = cmd_cxr[7] + str(cxr_path)

    cxr_path_check = os.path.join("../src/", cmd_cxr[7][8:])
    if os.path.exists(cxr_path_check):
        continue
    else:
        subprocess.run(cmd_cxr, check=True)

In [None]:
from Scripts.medmod.split_medmod import *
medmod_fp = "../src/data/root/medmod_final.csv"
mimic_dir = "../src/physionet.org/files/mimic-cxr-jpg/2.1.0/files/"
save_dir = "../src/medmod"

medmod_df = pd.read_csv(medmod_fp)
train_df, val_df, test_df = label_split(medmod_df)

split_dict = {
    "train": train_df,
    "val": val_df,
    "test": test_df}

In [None]:
for split in split_dict:
    split_df = split_dict[split]

    split_dir = os.path.join(save_dir, split)
    os.makedirs(split_dir, exist_ok=True)

    csv_path = os.path.join(split_dir, f"{split}.csv")

    # Move files belonging to this split
    new_paths = []
    missing_count = 0
    for row in tqdm(split_df.itertuples(), total=len(split_df), desc=f"Moving {split} files"):
        cxr_path = str(row.cxr_path)

        # Full absolute path to the source CXR image
        src = os.path.join(mimic_dir, cxr_path)

        # Destination: /users/.../medmod/{split}/{filename}
        new_paths.append(os.path.basename(cxr_path))
        dst = os.path.join(split_dir, os.path.basename(cxr_path))

        # Skip if file already moved
        if os.path.exists(dst):
            continue

        if not os.path.exists(src):
            print(f"WARNING: Missing file {src}, skipping.")
            missing_count += 1
            continue

        # Move file
        shutil.move(src, dst)

    if missing_count > 0:
        print(f"WARNING: {missing_count} missing files in split '{split}'")

    split_df = split_df.copy()
    split_df['cxr_path'] = new_paths
    split_df.to_csv(csv_path, index=False)

    print(f"Script complete for {split} split")

In [None]:
!/bin/bash

FILE="../src/medmod/train/train.csv"

for col in $(seq 11 38); do
  count=$(cut -d',' -f"$col" "$FILE" \
            | grep -x '1' \
            | wc -l)
  echo "Column $col: $count"
done

echo "----------------------"

FILE="../src/medmod/val/val.csv"

for col in $(seq 11 38); do
  count=$(cut -d',' -f"$col" "$FILE" \
            | grep -x '1' \
            | wc -l)
  echo "Column $col: $count"
done

echo "----------------------"

FILE="../src/medmod/test/test.csv"

for col in $(seq 11 38); do
  count=$(cut -d',' -f"$col" "$FILE" \
            | grep -x '1' \
            | wc -l)
  echo "Column $col: $count"
done

echo "----------------------"

: 