# Building a Convolutional Neural Network (CNN) for Image Classification

## Tags:
- CNN
- Image Classification
- Deep Learning
- TensorFlow
- Keras

## Description:
In this notebook, we will implement a Convolutional Neural Network (CNN) using TensorFlow and Keras for image classification. We'll use the CIFAR-10 dataset, which consists of 60,000 32x32 color images in 10 classes, such as airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks.

In [1]:
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop
from shutil import copyfile
import matplotlib.pyplot as plt

# Get data

In [11]:
source_path = "/home/alrashidi/Desktop/Deep_learning_Models/CT_KIDNEY_DATASET_Normal_Cyst_Tumor_Stone/CT_KIDNEY_DATASET_Normal_Cyst_Tumor_Stone"
source_path_Cyst = os.path.join(source_path, "Cyst")
source_path_Normal = os.path.join(source_path, "Normal")
source_path_Stone = os.path.join(source_path, "Stone")
source_path_Tumor = os.path.join(source_path, "Tumor")

# Deletes all non_imags files
# !find {source_path} -type f ! -name "*.jpg" -exec rm {} +

# os.listdir reruns a list containing all files under the given path
print(f"There are {len(os.listdir(source_path_Cyst))} imges of Cyst")
print(f"There are {len(os.listdir(source_path_Normal))} imges of Normal")
print(f"There are {len(os.listdir(source_path_Stone))} imges of Stone")
print(f"There are {len(os.listdir(source_path_Tumor))} imges of Tumor")

There are 3709 imges of Cyst
There are 5077 imges of Normal
There are 1377 imges of Stone
There are 2283 imges of Tumor


# Create training validation directions 

In [14]:
# Define root directory
root_dir = "training_validation_CDK"

# Empty directory to prevent FileExistsError is the function is run several times
if os.path.exists(root_dir):
    shutil.rmtree(root_dir)

# Create Function
def create_train_val_dirs(root_path) -> None:
    """
    Create direcotries for the train and test sets

    Args:
       root_path (string) - the base directory path to create subdirectories from

    Returns:
       None
    """
    # Create directories for training and validation sets
    # Define the path for the train and validation sets

    train_path = os.path.join(root_path, "training")
    val_path = os.path.join(root_path, "validation")

    # Create the traina and validation directories
    os.makedirs(train_path)
    os.makedirs(val_path)

    # Inside each of the traina and validation directorues, create 'Cyst', 'Normal', 'Stone', 'Tumor' subdirectories
    os.makedirs(os.path.join(train_path, "Cyst"))
    os.makedirs(os.path.join(train_path, "Normal"))
    os.makedirs(os.path.join(train_path, "Stone"))
    os.makedirs(os.path.join(train_path, "Tumor"))
    os.makedirs(os.path.join(val_path, "Cyst"))
    os.makedirs(os.path.join(val_path, "Normal"))
    os.makedirs(os.path.join(val_path, "Stone"))
    os.makedirs(os.path.join(val_path, "Tumor"))
    """
     /training_validation_CDK
     |-- train
     |   |-- Cyst 
     |   |-- Normal
     |   |-- Stone 
     |   |-- Tumor 
     
     |-- validation
     |   |-- Cyst 
     |   |-- Normal
     |   |-- Stone 
     |   |-- Tumor 
     """
    print(f"Directories created: {train_path}, {val_path}")

try:
    create_train_val_dirs(root_path=root_dir)
except Exception as e:
    # Code to handle the specific exception
    raise ValueError(f"An error occurred: {e}.")

# Test your create_train_val-dirs function
for root_dir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
        print(os.path.join(root_dir, subdir))

Directories created: training_validation_CDK/training, training_validation_CDK/validation
training_validation_CDK/validation
training_validation_CDK/training
training_validation_CDK/validation/Stone
training_validation_CDK/validation/Normal
training_validation_CDK/validation/Cyst
training_validation_CDK/validation/Tumor
training_validation_CDK/training/Stone
training_validation_CDK/training/Normal
training_validation_CDK/training/Cyst
training_validation_CDK/training/Tumor


# Split Data

In [15]:
# Function: split_data
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  """
  Splits the data into train and test sets

  Args:
    SOURCE_DIR (string): directory path containing the images
    TRAINING_DIR (string): directory path to be used for training
    VALIDATION_DIR (string): directory path to be used for validation
    SPLIT_SIZE (float): proportion of the dataset to be used for training

  Returns:
    None
  """
  # Check if the directories exist; if not, create them
  if not os.path.exists(TRAINING_DIR):
    os.makedirs(TRAINING_DIR)
  if not os.path.exists(VALIDATION_DIR):
    os.makedirs(VALIDATION_DIR)
  
  # Get the list of files
  files = os.listdir(SOURCE_DIR)

  # Shuffle the list of files
  random.sample(files, len(files))

  # Calculate the split index based on SPLIT_SIZE
  split_index = int(SPLIT_SIZE * len(files))

  # Seoarate files into training and validation sets
  training_files = files[:split_index]
  validation_files = files[split_index:]

  # Copy files to training directory 
  for file in training_files:
    source = os.path.join(SOURCE_DIR, file)
    destination = os.path.join(TRAINING_DIR, file)
    if os.path.getsize(source) > 0:
      copyfile(source, destination)
    else:
      print(f"{file} is zero lenght, so ignoring.")

  # Copy files to validation directory 
  for file in validation_files:
    source = os.path.join(SOURCE_DIR, file)
    destination = os.path.join(VALIDATION_DIR, file)
    if os.path.getsize(source) > 0:
      copyfile(source, destination)
    else:
      print(f"{file} is zero lenght, so ignoring.")

# Test your split_data function

In [18]:
# Define path
ROOT_DIR = "CT_KIDNEY_DATASET_Normal_Cyst_Tumor_Stone/CT_KIDNEY_DATASET_Normal_Cyst_Tumor_Stone/"
Cyst_SOURCE_DIR  = f"{ROOT_DIR}/Cyst"
Normal_SOURCE_DIR= f"{ROOT_DIR}/Normal"
Stone_SOURCE_DIR = f"{ROOT_DIR}/Stone"
Tumor_SOURCE_DIR = f"{ROOT_DIR}/Tumor"

TRAINING_DIR = "training_validation_CDK/training"
VALIDATION_DIR = "training_validation_CDK/validation"

TRAINING_Cyst_DIR = os.path.join(TRAINING_DIR, "Cyst")
TRAINING_Normal_DIR = os.path.join(TRAINING_DIR, "Normal")
TRAINING_Stone_DIR = os.path.join(TRAINING_DIR, "Stone")
TRAINING_Tumor_DIR = os.path.join(TRAINING_DIR, "Tumor")

VALIDATION_Cyst_DIR = os.path.join  (VALIDATION_DIR, "Cyst")
VALIDATION_Normal_DIR = os.path.join(VALIDATION_DIR, "Normal")
VALIDATION_Stone_DIR = os.path.join (VALIDATION_DIR, "Stone")
VALIDATION_Tumor_DIR = os.path.join (VALIDATION_DIR, "Tumor")

# Empty directories in case you run this cell multiple times
def directories_multiple(path):
    if len(os.listdir(path)) > 0:
       for file in os.scandir(path):
           os.remove(file.path)

# /Training
directories_multiple(TRAINING_Cyst_DIR)
directories_multiple(TRAINING_Normal_DIR)
directories_multiple(TRAINING_Stone_DIR)
directories_multiple(TRAINING_Tumor_DIR)
# / Validation
directories_multiple(VALIDATION_Cyst_DIR)
directories_multiple(VALIDATION_Normal_DIR)
directories_multiple(VALIDATION_Stone_DIR)
directories_multiple(VALIDATION_Tumor_DIR)

# Define proportion of images used for training
split_size = .9

# Run the function
# NOTE: Massages about zero length images should be printed out
split_data(SOURCE_DIR=Cyst_SOURCE_DIR, TRAINING_DIR=TRAINING_Cyst_DIR , VALIDATION_DIR=VALIDATION_Cyst_DIR, SPLIT_SIZE=split_size)
split_data(SOURCE_DIR=Normal_SOURCE_DIR, TRAINING_DIR=TRAINING_Normal_DIR, VALIDATION_DIR=VALIDATION_Normal_DIR, SPLIT_SIZE=split_size)
split_data(SOURCE_DIR=Stone_SOURCE_DIR, TRAINING_DIR=TRAINING_Stone_DIR, VALIDATION_DIR=VALIDATION_Stone_DIR, SPLIT_SIZE=split_size )
split_data(SOURCE_DIR=Tumor_SOURCE_DIR, TRAINING_DIR=TRAINING_Tumor_DIR, VALIDATION_DIR=VALIDATION_Tumor_DIR, SPLIT_SIZE=split_size)

# The function should perform copies rather than moving images or original directories should contain unchan ged images
print(f"Original Cyst a directory has {len(os.listdir(Cyst_SOURCE_DIR))} images")
print(f"Original Normal a directory has {len(os.listdir(Normal_SOURCE_DIR))} images")
print(f"Original Stone a directory has {len(os.listdir(Stone_SOURCE_DIR))} images")
print(f"Original Tumor a directory has {len(os.listdir(Tumor_SOURCE_DIR))} images")

# Training and validation splits. Check
print(f"There are {len(os.listdir(TRAINING_Cyst_DIR))} images of Cyst for training")
print(f"There are {len(os.listdir(TRAINING_Normal_DIR))} images of Normal for training")
print(f"There are {len(os.listdir(TRAINING_Stone_DIR))} images of Stone for training")
print(f"There are {len(os.listdir(TRAINING_Tumor_DIR))} images of Tumor for training")
#/ Validation
print(f"There are {len(os.listdir(VALIDATION_Cyst_DIR))} images of Cyst for validation")
print(f"There are {len(os.listdir(VALIDATION_Normal_DIR))} images of Normal for validation")
print(f"There are {len(os.listdir(VALIDATION_Stone_DIR))} images of Stone for validation")
print(f"There are {len(os.listdir(VALIDATION_Tumor_DIR))} images of Tumor for validation")

Original Cyst a directory has 3709 images
Original Normal a directory has 5077 images

Original Stone a directory has 1377 images
Original Tumor a directory has 2283 images

There are 3338 images of Cyst for training
There are 4569 images of Normal for training
There are 1239 images of Stone for training
There are 2054 images of Tumor for training
There are 371 images of Cyst for validation
There are 508 images of Normal for validation
There are 138 images of Stone for validation
There are 229 images of Tumor for validation


# Train and Validation Generators

In [23]:
## Image shape: 512 x 512 x 3 
# FUNCTION: train_val_generators
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  """
  Creates the training and validation data generators

  Args:
    TRAINING_DIR (string): directory path containing the training images
    VALIDATION_DIR (string): directory path containing the testing/validation images

  Returns:
    train_generator, validation_generator - tuple containing the generators
  """

  # Instantiate the ImageDataGenerator class (and set the arguments to agument the images)
  train_datagen = ImageDataGenerator(rescale=1./255,
                                     rotation_range=45,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     shear_range=0.2,
                                     zoom_range=0.2,
                                     horizontal_flip=True,
                                     fill_mode='nearest')
  # Pass the appropriate arguments to the flow_from_directory method for the training data
  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=16,
                                                      class_mode='categorical',
                                                      target_size=(512 , 512))
  
  # Instantiate the ImageDataGenerator class (with rescale)
  validation_datagen = ImageDataGenerator(rescale=1./255)

  # pass the appropriate argument to the flow_from_directory method for the trainig data
  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                batch_size=16,
                                                                class_mode='categorical',
                                                                target_size=(512, 512))
  
  # Return: Train, Validation
  return train_generator, validation_generator

# Test Generators
train_generator, validation_generator = train_val_generators(TRAINING_DIR=TRAINING_DIR,
                                                             VALIDATION_DIR=VALIDATION_DIR)

Found 11200 images belonging to 4 classes.
Found 1246 images belonging to 4 classes.


# Build Model 

Basics CNN,

In [None]:
# Create Function: create_model
def create_model():

    # Basic CNN
    model = tf.keras.models.Sequential([
            # Layer 1: Convolutional layer with 96 filters, kernel size 11x11, and ReLU activation
            tf.keras.layers.Conv2D(96, (11,11), activation='rule', input_shape=(512, 512, 3)) ,
            # Max pooling layer with pool size 2x2
            tf.keras.layers.MaxPool2D(2,2) ,

            # Layer 2: Convolutional layer with 256 filters, kernel size 5x5, and ReLU activation
            tf.keras.layers.Conv2D(256, (5,5), activation='rule'),
            # Max pooling layer with pool size 2x2
            tf.keras.layers.MaxPool2D(2,2) ,

            # Layer 2: Convolutional layer with 384 filters, kernel size 3x3, and ReLU activation
            tf.keras.layers.Conv2D(384, (3,3), activation='rule') ,
            # Max pooling layer with pool size 2x2
            tf.keras.layers.MaxPool2D(2,2) ,

            # Layer 2: Convolutional layer with 384 filters, kernel size 3x3, and ReLU activation
            tf.keras.layers.Conv2D(384, (3,3), activation='rule') ,
            # Max pooling layer with pool size 2x2
            tf.keras.layers.MaxPool2D(2,2) ,

            # Layer 2: Convolutional layer with 256 filters, kernel size 5x5, and ReLU activation
            tf.keras.layers.Conv2D(384, (3,3), activation='rule') ,
            # Max pooling layer with pool size 2x2
            tf.keras.layers.MaxPool2D(2,2),

            # Layer 2: Convolutional layer with 256 filters, kernel size 5x5, and ReLU activation
            tf.keras.layers.Conv2D(256, (3,3), activation='rule'),
            # Max pooling layer with pool size 2x2
            tf.keras.layers.MaxPool2D(2,2),

            # Flatten layer to convert 3D feature maps to 1D feature vectors
            tf.keras.layers.Flatten(),

            # Layer 7: Fully connected layer with 4096 units and ReLU activation
            tf.keras.layers.Dense(4096, activation='relu'),
            # Dropout layer with dropout rate 0.5 to reduce overfitting
            tf.keras.layers.Dropout(0.5),

            # Output layers use softmax activation funcation
            tf.keras.layers.Dense(4, activation='softmax'),
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,
                                         beta_1=0.9,
                                         beta_2=0.999,
                                         epsilon=1e-07
                                         )
    model.compile(optimizer=optimizer,
                  loss="categorical_crossentropy",
                  metrics=['accuracy'])
    
    return model
    # categorical_crossentropy

# Train Model

In [None]:
# Get the untrained model
model = create_model()

# Train the model
# Note that this may take some time.
history = model.fit(train_generator,
                    epochs=100,
                    verbose=1,
                    validation_data=validation_generator)

# Plot the Accuracy and Loss for Traing and Validation

In [None]:
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.show()
print("")

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.show()

# Download History for model

In [None]:
def download_history():
    import pickle

    # Save the history to a file
    with open('history_augmented.pkl', 'wb') as f:
        pickle.dump(history.history, f)

    print("History saved to 'history_augmented.pkl'")

# Call the function
download_history()

## Conclusion:
We successfully built and trained a CNN using TensorFlow and Keras for image classification on the CIFAR-10 dataset. The model achieved an accuracy of [insert accuracy here]% on the test set. Further improvements could be made by experimenting with different architectures, hyperparameters, and augmentation techniques.
