#normalised images with masks

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm


# Define paths
print(os.getcwd())
base_path = "../data"
base_path_out = os.path.join(base_path, "processed")

# Load train CSV file
train_csv_path = os.path.join(base_path_out, "df_xray_train_norm_plus_augmented.csv")
print("Loading images from:", os.path.abspath(train_csv_path))

# 1) Load the train CSV file
df_train = pd.read_csv(train_csv_path)
df_train['image_path'] = df_train.apply(lambda row: os.path.join(row['path'], row['file']), axis=1)
df_train.head()

# Step 2: Define function for resizing and flattening images
#def resize_and_flatten_image(image_path, size=(128, 128)):
def resize_and_flatten_image(image_path, size=(20, 20)):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is not None:
        img_resized = cv2.resize(img, size)  # Resize image
        img_flattened = img_resized.reshape(-1)  # Flatten image to 1D vector
        return img_flattened
    else:
        print(f"Failed to load image: {image_path}")
        return None

# Step 3: Load and preprocess the training images (resize and flatten)
image_data = []
for path in tqdm(df_train['image_path'], desc="Loading and processing train images"):
    #img_flattened = resize_and_flatten_image(path, size=(128, 128))
    img_flattened = resize_and_flatten_image(path, size=(20, 20))
    if img_flattened is not None:
        image_data.append(img_flattened)

# Convert to NumPy array
X_train = np.array(image_data, dtype=np.uint8)
y_train = df_train['label_enc'].to_numpy()

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# Save the resized and flattened training data
np.savez_compressed(os.path.join(base_path_out, 'train_data_resized_with_masks.npz'), X_train=X_train, y_train=y_train)
print("Resized and flattened train images have been saved!")



c:\Users\arfin\Downloads\covid xray\mar25-bds_analysis-of-covid-19-chest-x-rays\notebooks
Loading images from: c:\Users\arfin\Downloads\covid xray\mar25-bds_analysis-of-covid-19-chest-x-rays\data\processed\df_xray_train_norm_plus_augmented.csv


Loading and processing train images: 100%|██████████| 35059/35059 [00:18<00:00, 1923.72it/s]


X_train shape: (35059, 16384)
y_train shape: (35059,)
Resized and flattened train images have been saved!


In [None]:
# Load test CSV
test_csv_path = os.path.join(base_path_out, 'df_xray_processed_normed_enc_test.csv')
df_test = pd.read_csv(test_csv_path)

# Build image paths for test data
df_test['image_path'] = df_test.apply(lambda row: os.path.normpath(os.path.join(os.getcwd(), row['path'], row['file'])), axis=1)

# Step 4: Load and preprocess the test images (resize and flatten)
test_image_data = []
for path in tqdm(df_test['image_path'], desc="Loading and processing test images"):
    #img_flattened = resize_and_flatten_image(path, size=(128, 128))
    img_flattened = resize_and_flatten_image(path, size=(20, 20))
    if img_flattened is not None:
        test_image_data.append(img_flattened)

# Convert to NumPy array
X_test = np.array(test_image_data, dtype=np.uint8)
y_test = df_test['label_enc'].to_numpy()

print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

# Save the resized and flattened test data
np.savez_compressed(os.path.join(base_path_out, 'test_data_resized_with_masks.npz'), X_test=X_test, y_test=y_test)
print("Resized and flattened test images have been saved!")




Loading and processing test images: 100%|██████████| 4233/4233 [00:02<00:00, 1967.60it/s]


X_test shape: (4233, 16384)
y_test shape: (4233,)
Resized and flattened test images have been saved!
