In [1]:
from imblearn.over_sampling import SMOTE
import cv2
import os
import numpy as np

In [2]:
dataset_dir = "D:/NEUB/MachineLearning/ThesisProject/x-rayImages/chest_xray/train"

In [3]:
classes = os.listdir(dataset_dir)

class_counts = {}

for class_name in classes:
    class_path = os.path.join(dataset_dir, class_name)
    num_samples = len(os.listdir(class_path))
    class_counts[class_name] = num_samples

# Determine the minority class
minority_class = min(class_counts, key=class_counts.get)

print("Class counts:")
for class_name, count in class_counts.items():
    print(f"{class_name}: {count} samples")

print(f"The minority class is: {minority_class}")

Class counts:
NORMAL: 1341 samples
PNEUMONIA: 3875 samples
The minority class is: NORMAL


In [4]:
def load_images_from_directory(directory):
    images = []
    labels = []
    for class_name in os.listdir(directory):
        class_dir = os.path.join(directory, class_name)
        if os.path.isdir(class_dir):
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                image = cv2.imread(image_path)
                if image is not None:
                    image = cv2.resize(image, (256, 256))
                    images.append(image)
                    labels.append(class_name)
    return images, labels

In [5]:
images, labels = load_images_from_directory(dataset_dir)

In [6]:
X = np.array(images)
y = np.array(labels)

In [7]:
# Initialize SMOTE with your desired sampling strategy
smote = SMOTE(sampling_strategy='auto',k_neighbors=5, random_state=42)

In [8]:
# Apply SMOTE to your training data
X_resampled, y_resampled = smote.fit_resample(X.reshape(-1, X.shape[1]*X.shape[2]*X.shape[3]), y)

In [9]:
# Convert the resampled images back to their original shape
X_resampled = X_resampled.reshape(-1, X.shape[1], X.shape[2], X.shape[3])

In [26]:
desired_contrast = 0.5
desired_brightness = 0

In [27]:
adjusted_synthetic_samples = []
for synthetic_sample in X_resampled:
    # Convert to 8-bit unsigned integer format
    synthetic_sample = np.uint8(synthetic_sample)
    
    # Apply contrast adjustment
    adjusted_sample = cv2.convertScaleAbs(synthetic_sample, alpha=desired_contrast, beta=desired_brightness)
    
    # Append the adjusted sample to the list
    adjusted_synthetic_samples.append(adjusted_sample)

In [28]:
X_train_adjusted = np.concatenate((X, adjusted_synthetic_samples), axis=0)
y_train_adjusted = np.concatenate((y, y_resampled), axis=0)

In [29]:
resampled_dir = "D:/NEUB/MachineLearning/ThesisProject/x-rayImages/chest_xray/synthetic_samples/train_resampled_adjust_contrast_v1"
os.makedirs(resampled_dir, exist_ok=True)

In [30]:
for i, (image, label) in enumerate(zip(adjusted_synthetic_samples, y_resampled)):
    class_dir = os.path.join(resampled_dir, label)
    os.makedirs(class_dir, exist_ok=True)
    filename = f"{label}_{i}.jpeg"
    image_path = os.path.join(class_dir, filename)
    cv2.imwrite(image_path, image)
print("done!")

done!
