In [13]:
import os
import numpy as np
import cv2
from imblearn.over_sampling import SMOTE

In [14]:
# For full slice
input_dir = '/kaggle/input/lung-ds-weights/Full_slice/train'
IMG_SHAPE = (512, 512, 3)
output_dir = "/kaggle/working/Full_slice_SMOTE"

# For nodule
# input_dir = '/kaggle/input/lung-ds-weights/Nodule/train'
# IMG_SHAPE = (52, 52, 3)
# output_dir = "/kaggle/working/Nodule_SMOTE"

# Count files in each subfolder
for subfolder in ["0", "1", "2", "3", "4"]:
    subfolder_path = os.path.join(input_dir, subfolder)
    if os.path.exists(subfolder_path):
        num_files = len(os.listdir(subfolder_path))
        print(f"{subfolder}: {num_files} images")
    else:
        print(f"{subfolder}: Does not exist")

0: 196 images
1: 359 images
2: 887 images
3: 329 images
4: 119 images


In [15]:
# Initialize lists
X, y = [], []

# Read images from class folders
for class_label in ["0", "1", "2", "3", "4"]:  # Assuming folder names are "0" and "1"
    class_path = os.path.join(input_dir, class_label)
    
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)

        # Store image and label
        X.append(img)
        y.append(int(class_label))  # Convert folder name to label

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

# Reshape for SMOTE (it only works on 2D)
X_flattened = X.reshape(X.shape[0], -1)  # Flatten to 2D


In [16]:
smote = SMOTE(sampling_strategy={0: 400, 1: 720, 3:660, 4:240})
X_resampled, y_resampled = smote.fit_resample(X_flattened, y)

# Reshape images back to 3D
X_resampled = X_resampled.reshape(-1, IMG_SHAPE[0], IMG_SHAPE[1], IMG_SHAPE[2])

In [17]:
for i, (img, label) in enumerate(zip(X_resampled[len(X):], y_resampled[len(y):])):  # Only save new images
    class_dir = os.path.join(output_dir, str(label))
    os.makedirs(class_dir, exist_ok=True)
    
    # Save the image
    img_path = os.path.join(class_dir, f"smote_{i}.png")
    cv2.imwrite(img_path, img)

print("SMOTE-generated 3D images saved successfully!")


SMOTE-generated 3D images saved successfully!


In [18]:
# Count files in each subfolder
for subfolder in ["0", "1", "2", "3", "4"]:
    subfolder_path = os.path.join(output_dir, subfolder)
    if os.path.exists(subfolder_path):
        num_files = len(os.listdir(subfolder_path))
        print(f"{subfolder}: {num_files} images")
    else:
        print(f"{subfolder}: Does not exist")

0: 204 images
1: 361 images
2: Does not exist
3: 331 images
4: 121 images


In [19]:
import shutil
shutil.make_archive("smote-ds", 'zip', output_dir)

'/kaggle/working/smote-ds.zip'