# Mounting Google Drive:
The drive.mount('/content/drive') command mounts the user's Google Drive in the Colab environment.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Dataset Preparation
This code snippet focuses on preparing the dataset for further processing. It involves importing necessary libraries, defining the path to the directory containing images, listing images in the directory, checking the data type of the variable storing image names, and obtaining the total number of images in the dataset.

In [None]:
import os
import cv2
import matplotlib.pyplot as plt

path = '/content/drive/MyDrive/DS/benign'
images=os.listdir(path)
type(images)
len(images)

16


# Image Data Extraction:

This code snippet iterates through a list of image file names within a designated directory. For each image file, it reads the image using OpenCV, converts it into a NumPy array, and appends the array to a list. The resulting list, img_data, contains all the image data from the directory.

In [None]:
img_data=[]
for img in images:
    img_arr=cv2.imread(os.path.join(path,img))
    img_data.append(img_arr)


# Image Visualization:

This code snippet utilizes matplotlib to visualize the images stored in the img_data list. It generates a grid of subplots, each containing an image.

In [None]:
plt.figure(figsize=(20,20))
for i in range(len(img_data)):
    plt.subplot(10,10,i+1)
    plt.imshow(img_data[i])

# Image Augmentation Configuration:

This code sets up an image data generator using Keras' ImageDataGenerator class. Image augmentation parameters are passed to the constructor to specify the transformations to be applied to the images during training.

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from skimage import io

# Construct an instance of the ImageDataGenerator class
# Pass the augmentation parameters through the constructor.

datagen = ImageDataGenerator(
        rotation_range=45,     #Random rotation between 0 and 45
        width_shift_range=0.2,   #% shift
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='reflect')


# Image Augmentation Process:

This code snippet performs image augmentation using the previously configured ImageDataGenerator. It generates augmented images on-the-fly and saves them to a specified directory. The code initializes a loop to generate augmented images.

In [None]:


i = 0
for batch in datagen.flow_from_directory(directory='/content/drive/MyDrive/DS', classes=["benign"],
                                         batch_size=16,
                                         target_size=(1200, 1600),
                                         color_mode="rgb",
                                         save_to_dir='/content/drive/MyDrive/TF2',
                                         save_prefix="aug",
                                         save_format="png"):
    i += 1
    if i > 24:
        break

Found 16 images belonging to 1 classes.


# Installing split-folder with full functionalities

In [None]:
pip install split-folders[full]

# Dataset Splitting:
The primary functionality of split-folders is to split a dataset into multiple subsets, typically train, validation, and test sets. It automatically divides the dataset into specified proportions and creates separate directories for each subset.

# Splitting Process:

The splitfolders.ratio() function processes the input dataset and splits it into the specified ratios for train, validation, and test sets.
It creates directories within the specified output directory to store the split datasets, organizing them into train, validation, and test sets accordingly.

In [None]:
import splitfolders
input_folder='/content/drive/MyDrive/hist_ben_dataset'
splitfolders.ratio(input_folder, output="/content/drive/MyDrive/hist_ben_split",
                  seed=42, ratio=(.7,.2,.1),
                  group_prefix=None)

Copying files: 3882 files [01:31, 42.23 files/s] 
