In [1]:
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import random_split
from torch import nn
from torchsummary import summary
import os
import random
import shutil


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
# Define paths to the original 'normal' and 'cancerous' images
normal_100_dir = 'Normal_100'
normal_400_dir = 'Normal_400'
cancerous_100_dir = 'Cancerous_100'
cancerous_400_dir = 'Cancerous_400'


# Define paths for train, validation, and test data
train_dir = 'train_data'
validation_dir = 'validation_data'
test_dir = 'test_data'

test_ratio = 0.1

# Create train, validation, and test folder if not existing already
if not os.path.exists(train_dir):
    os.makedirs(train_dir)
    os.makedirs(os.path.join(train_dir, 'normal'))
    os.makedirs(os.path.join(train_dir, 'cancerous'))
if not os.path.exists(validation_dir):
    os.makedirs(validation_dir)
    os.makedirs(os.path.join(validation_dir, 'normal'))
    os.makedirs(os.path.join(validation_dir, 'cancerous'))
if not os.path.exists(test_dir):
    os.makedirs(test_dir)
    os.makedirs(os.path.join(test_dir, 'normal'))
    os.makedirs(os.path.join(test_dir, 'cancerous'))

In [4]:
# List the images in the 'normal' and 'cancerous' directories
normal_100_files = os.listdir(normal_100_dir)
normal_400_files = os.listdir(normal_400_dir)
cancerous_100_files = os.listdir(cancerous_100_dir)
cancerous_400_files = os.listdir(cancerous_400_dir)

# Randomly shuffle the files list
random.shuffle(normal_100_files)
random.shuffle(normal_400_files)
random.shuffle(cancerous_100_files)
random.shuffle(cancerous_400_files)

In [8]:
# calculate the number of samples for testing
num_normal_100 = 30
num_cancerous_100 = 30
num_normal_400 = 35
num_cancerous_400 = 35

# copy the images from the original directories to the test folder
for file in normal_100_files[:num_normal_100]:
    src_path = os.path.join(normal_100_dir,file)
    des_path = os.path.join(test_dir,'normal', file)
    shutil.copy(src_path,des_path)
for file in normal_400_files[:num_normal_400]:
    src_path = os.path.join(normal_400_dir,file)
    des_path = os.path.join(test_dir,'normal', file)
    shutil.copy(src_path,des_path)

for file in cancerous_100_files[:num_cancerous_100]:
    src_path = os.path.join(cancerous_100_dir,file)
    des_path = os.path.join(test_dir,'cancerous', file)
    shutil.copy(src_path,des_path)
for file in cancerous_400_files[:num_cancerous_400]:
    src_path = os.path.join(cancerous_400_dir,file)
    des_path = os.path.join(test_dir,'cancerous', file)
    shutil.copy(src_path,des_path)

# copy the images from the original directories to the validation folder
for file in normal_100_files[num_normal_100:(num_normal_100*2)]:
    src_path = os.path.join(normal_100_dir,file)
    des_path = os.path.join(validation_dir,'normal', file)
    shutil.copy(src_path,des_path)
for file in normal_400_files[num_normal_400:(num_normal_400*2)]:
    src_path = os.path.join(normal_400_dir,file)
    des_path = os.path.join(validation_dir,'normal', file)
    shutil.copy(src_path,des_path)

for file in cancerous_100_files[num_cancerous_100:(num_cancerous_100*2)]:
    src_path = os.path.join(cancerous_100_dir,file)
    des_path = os.path.join(validation_dir,'cancerous', file)
    shutil.copy(src_path,des_path)
for file in cancerous_400_files[num_cancerous_400:(num_cancerous_400*2)]:
    src_path = os.path.join(cancerous_400_dir,file)
    des_path = os.path.join(validation_dir,'cancerous', file)
    shutil.copy(src_path,des_path)

In [10]:
# copy the images from the original directories to the train folder
for file in normal_100_files[(num_normal_100*2):]:
    src_path = os.path.join(normal_100_dir,file)
    des_path = os.path.join(train_dir,'normal', file)
    shutil.copy(src_path,des_path)
for file in normal_400_files[(num_normal_400*2):]:
    src_path = os.path.join(normal_400_dir,file)
    des_path = os.path.join(train_dir,'normal', file)
    shutil.copy(src_path,des_path)

for file in cancerous_100_files[(num_cancerous_100*2):]:
    src_path = os.path.join(cancerous_100_dir,file)
    des_path = os.path.join(train_dir,'cancerous', file)
    shutil.copy(src_path,des_path)
for file in cancerous_400_files[(num_cancerous_400*2):]:
    src_path = os.path.join(cancerous_400_dir,file)
    des_path = os.path.join(train_dir,'cancerous', file)
    shutil.copy(src_path,des_path)

In [11]:
print(len(normal_100_files[(num_normal_100*2):]) + len(normal_400_files[(num_normal_400*2):]))
print(len(cancerous_100_files[(num_cancerous_100*2):]) + len(cancerous_400_files[(num_cancerous_400*2):]))

160
804


In [12]:

to_augment = len(os.listdir('train_data/normal'))
print(to_augment)


160


In [13]:
import cv2
import os
import numpy as np

# Path to the directory containing your original images
original_images_directory = 'train_data/normal'

# Path to the directory where you want to save the augmented images
augmented_images_directory = 'train_data/nomral'

# List all the images in the original directory
original_images = os.listdir(original_images_directory)

# Ensure the output directory exists
os.makedirs(augmented_images_directory, exist_ok=True)

# Loop through each original image, apply flips, and save the augmented images
for image_name in original_images:
    # Load the original image
    img = cv2.imread(os.path.join(original_images_directory, image_name))

    # Apply horizontal flip
    img_horizontal_flip = cv2.flip(img, 1)


    # Save augmented images
    cv2.imwrite(os.path.join(augmented_images_directory, f"{image_name[:-4]}_horizontal.jpg"), img_horizontal_flip)
  


In [14]:
import cv2
import os
import numpy as np

# Path to the directory containing your original images
original_images_directory = 'train_data/normal'

# Path to the directory where you want to save the augmented images
augmented_images_directory = 'train_data/nomral'

# List all the images in the original directory
original_images = os.listdir(original_images_directory)

# Ensure the output directory exists
os.makedirs(augmented_images_directory, exist_ok=True)

# Loop through each original image, apply flips, and save the augmented images
for image_name in original_images:
    # Load the original image
    img = cv2.imread(os.path.join(original_images_directory, image_name))


    # Apply vertical flip
    img_vertical_flip = cv2.flip(img, 0)

    cv2.imwrite(os.path.join(augmented_images_directory, f"{image_name[:-4]}_vertical.jpg"), img_vertical_flip)

In [9]:
print(len(os.listdir('train_data/normal')))

640


In [15]:
import scipy
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import random
from PIL import Image

# Path to the directory containing your original images
original_images_directory = 'train_data/normal'

# Path to the directory where you want to save the augmented images
augmented_images_directory = 'train_data/augmented'

# Number of images to randomly select
num_images_to_augment = 164

# Create the ImageDataGenerator with desired augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=45,
    fill_mode='reflect'
)

# List all the images in the original directory
original_images = os.listdir(original_images_directory)

# Ensure the output directory exists
os.makedirs(augmented_images_directory, exist_ok=True)

# Randomly select num_images_to_augment images
selected_images = random.sample(original_images, num_images_to_augment)

# Loop through each selected image, apply augmentations, and save one augmented image
for image_name in selected_images:
    # Load the original image
    img = load_img(os.path.join(original_images_directory, image_name))
    x = img_to_array(img)
    x = x.reshape((1,) + x.shape)

    # Generate one augmented image
    augmented_img_array = next(datagen.flow(x, batch_size=1))[0].astype('uint8')
    augmented_img = Image.fromarray(augmented_img_array)
    
    # Save the augmented image
    augmented_img.save(os.path.join(augmented_images_directory, f"{image_name[:-4]}_augmented.jpg"))


In [16]:
print(len(os.listdir('train_data/normal')))

804


In [17]:
print(len(os.listdir('train_data')))

2
