In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator

# Function to load images from a directory
def load_images(directory, target_size=(224, 224)):  
    images = []
    labels = []
    for filename in os.listdir(directory):
        if filename.endswith(".jpg"):
            img_path = os.path.join(directory, filename)
            
            # Read and resize the image
            img = cv2.imread(img_path)
            img = cv2.resize(img, target_size)
            
            # Preprocess the image if needed
            # e.g., img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            images.append(img)
            labels.append(1 if "brain" in directory.lower() else 0)  # Assuming "brain" in the path indicates brain images
    return np.array(images), np.array(labels)

# Load brain tumor dataset
brain_images, brain_labels = load_images("C:/Users/net pc/Desktop/THESIS PROJECT/AUGMENTED/brain_tumor")

# Load kidney tumor dataset
kidney_images, kidney_labels = load_images("C:/Users/net pc/Desktop/THESIS PROJECT/AUGMENTED/kidney_tumor")

# Display demographic distribution before augmentation
print("Brain Tumor Dataset:")
print("Total images:", len(brain_images))
print("Distribution of labels:", dict(zip(*np.unique(brain_labels, return_counts=True))))

print("\nKidney Tumor Dataset:")
print("Total images:", len(kidney_images))
print("Distribution of labels:", dict(zip(*np.unique(kidney_labels, return_counts=True))))




Brain Tumor Dataset:
Total images: 1000
Distribution of labels: {1: 1000}

Kidney Tumor Dataset:
Total images: 1000
Distribution of labels: {0: 1000}


In [2]:
# Display demographic distribution
brain_data = {'Dataset': 'Brain Tumor', 'Total Images': len(brain_images),
              'Distribution of Labels': dict(zip(*np.unique(brain_labels, return_counts=True)))}

brain_df = pd.DataFrame(brain_data)
print(brain_df)

# Display demographic distribution
kidney_data = {'Dataset': 'Kidney Tumor', 'Total Images': len(kidney_images),
               'Distribution of Labels': dict(zip(*np.unique(kidney_labels, return_counts=True)))}

kidney_df = pd.DataFrame(kidney_data)
print(kidney_df)



       Dataset  Total Images  Distribution of Labels
1  Brain Tumor          1000                    1000
        Dataset  Total Images  Distribution of Labels
0  Kidney Tumor          1000                    1000


In [3]:
# Data Augmentation
def augment_data(images, labels, target_count=20000):
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest'
    )

    augmented_images = []
    augmented_labels = []

    while len(augmented_images) < target_count:
        for i in range(len(images)):
            img = images[i]
            label = labels[i]

            img = img.reshape((1,) + img.shape)  # Reshape to (1, height, width, channels) for the flow method

            for _ in datagen.flow(img, batch_size=1):
                augmented_images.append(_[0])
                augmented_labels.append(label)
                break

    return np.array(augmented_images), np.array(augmented_labels)

# Augment brain tumor dataset to 20,000
augmented_brain_images, augmented_brain_labels = augment_data(brain_images, brain_labels, target_count=20000)

# Augment kidney tumor dataset to 20,000
augmented_kidney_images, augmented_kidney_labels = augment_data(kidney_images, kidney_labels, target_count=20000)

# Save augmented images to new folders
output_brain_folder = "C:/Users/net pc/Desktop/THESIS PROJECT/AUGMENTED/brain_tumor_augmented"
output_kidney_folder = "C:/Users/net pc/Desktop/THESIS PROJECT/AUGMENTED/kidney_tumor_augmented"

os.makedirs(output_brain_folder, exist_ok=True)
os.makedirs(output_kidney_folder, exist_ok=True)

for i in range(len(augmented_brain_images)):
    cv2.imwrite(os.path.join(output_brain_folder, f"augmented_brain_{i}.jpg"), augmented_brain_images[i])

for i in range(len(augmented_kidney_images)):
    cv2.imwrite(os.path.join(output_kidney_folder, f"augmented_kidney_{i}.jpg"), augmented_kidney_images[i])

# Display demographic distribution after augmentation
print("\nDemographic Distribution after Augmentation:")
print("Augmented Brain Tumor Dataset:")
print("Total images:", len(augmented_brain_images))
print("Distribution of labels:", dict(zip(*np.unique(augmented_brain_labels, return_counts=True))))

print("\nAugmented Kidney Tumor Dataset:")
print("Total images:", len(augmented_kidney_images))
print("Distribution of labels:", dict(zip(*np.unique(augmented_kidney_labels, return_counts=True))))



Demographic Distribution after Augmentation:
Augmented Brain Tumor Dataset:
Total images: 20000
Distribution of labels: {1: 20000}

Augmented Kidney Tumor Dataset:
Total images: 20000
Distribution of labels: {0: 20000}
