In [1]:
import pandas as pd
import torch
import numpy as np
import wfdb
from tqdm import tqdm
from skimage.transform import resize
from joblib import dump

def normalize_signal(signal):
    # Calculate mean and standard deviation
    mean = np.mean(signal, axis=0)
    std = np.std(signal, axis=0)
    
    # Avoid division by zero by adding a small epsilon where std is 0
    std_safe = np.where(std == 0, 1e-10, std)  # Replace 0 std with a small value
    
    # Normalize the signal
    normalized_signal = (signal - mean) / std_safe
    
    return normalized_signal

def interpolate_signal(signal):
    signal_df = pd.DataFrame(signal)
    signal_df.interpolate(method='linear', axis=0, inplace=True, limit_direction='both')
    signal_corrected = signal_df.to_numpy()
    return signal_corrected

def load_ecg_data(df, base_path):
    full_paths = base_path + df['path']
    loaded_ecg_data = []

    for f in tqdm(full_paths, desc="Loading ECG data"):
        wave_array, meta = wfdb.rdsamp(f)
        # Interpolate and preprocess the signal
        wave_array = interpolate_signal(wave_array)
        
        # Reshape the signal into multiple channels
        num_channels = meta["n_sig"]
        num_samples_per_channel = wave_array.size // num_channels
        
        if wave_array.size % num_channels == 0:
            wave_array = wave_array.reshape(num_samples_per_channel, num_channels)
            
            # Process the signal further if needed
            wave_array = normalize_signal(wave_array)
            
            wave_array = wave_array.reshape(1, -1)
            
            loaded_ecg_data.append(wave_array)
            
        else:
            print(f"Unexpected data size in {f}. Skipping file.")

    # Assuming all arrays are of the same shape after flattening
    if loaded_ecg_data:
        loaded_ecg_data_np = np.stack(loaded_ecg_data, axis=0)
        return torch.tensor(loaded_ecg_data_np, dtype=torch.float32)
    else:
        return torch.empty(0)  # Return an empty tensor if no data


# Define parameters
base_path = 'Z:/tale2/Shared/Mohammod/mimic-iv-ecg-diagnostic-electrocardiogram-matched-subset-1.0/'
Y = pd.read_csv(base_path + 'final_mimic_ecg_metadata.csv')

# Load ECG data
X_ecg_loaded = load_ecg_data(Y, base_path)


# Save the processed data
torch.save(X_ecg_loaded, 'data_feature/ecg_features_tensor.pt')


In [1]:
import os
import pandas as pd
import numpy as np
import skimage.io
import skimage.transform
import torch
from PIL import Image
import torchvision.transforms as transforms

def preprocess_image(image_path, resize_dim=(224, 224)):
    print("Processing:", image_path)
    # Read image with skimage
    image = skimage.io.imread(image_path)

    # Ensure image is grayscale, if it's not, convert to grayscale
    if len(image.shape) == 3:
        image = skimage.color.rgb2gray(image)
    
    # Resize image
    image = skimage.transform.resize(image, resize_dim, mode='constant', anti_aliasing=True)
    
    # Normalize image for the encoder model
    # Assuming the model expects pixel values in [0, 1], adjust if necessary
    image = (image * 255).astype(np.uint8)  # Convert to uint8
    normalize = transforms.Normalize(mean=[0.5], std=[0.5])  # Adjust mean and std as per your model's training
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        normalize
    ])
    image = transform(image)

    return image

def process_images(root, csv_file):
    cases = pd.read_csv(os.path.join(root, csv_file))
    all_images = []

    for idx, row in cases.iterrows():
        image_path = os.path.join(root, row['file_path'])
        image = preprocess_image(image_path)
        all_images.append(image)

    # Convert list of tensors to 4D tensor (batch, channels, height, width)
    all_images = torch.stack(all_images)

    return all_images

# Usage
root = 'Z:/tale2/Shared/Mohammod/physionet.org/files/mimic-cxr-jpg/2.0.0'
csv_file = 'final_mimic_cxr_metadata.csv'
X_image_tensor = process_images(root, csv_file)
torch.save(X_image_tensor, 'data_feature/encoder_image_tensor.pt')
