Introduction:
1. Data Preprrocessing techniques:
    a. image resize
    b. Normalization 
    c. Data Augmentation (Example: Horizontal Flipping, rotation, scaling )
    d. Histogram Equalization
    e. Data Cleaning (Example: Removing images with low contrast)
    f. Noise Reduction (bluring and denoising)
    g. Splitting the Dataset (Example: 80-10-10 split)


Lets split the data first into training, validation and testing using the 70%, 15%, 15% respectively.

In [5]:
import os
import shutil
import random

# Get the current working directory
cwd = os.getcwd()

# Define paths relative to the working directory
data_dirOuter = os.path.join(cwd, 'data')
data_dir = os.path.join(data_dirOuter, 'data')  # data folder that contains all the images 
train_dir = os.path.join(data_dirOuter, 'train')  # Train directory inside the first data folder
val_dir = os.path.join(data_dirOuter, 'val')      # Validation directory inside the first data folder
test_dir = os.path.join(data_dirOuter, 'test')

# Define the split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Create directories for train, val, and test sets
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Loop through each class folder
for class_name in os.listdir(data_dir):
    class_path = os.path.join(data_dir, class_name)
    if os.path.isdir(class_path):
        images = os.listdir(class_path)
        random.shuffle(images)
        
        # Calculate split indices
        train_split = int(train_ratio * len(images))
        val_split = int((train_ratio + val_ratio) * len(images))
        
        # Move images to respective directories
        for i, image in enumerate(images):
            src_path = os.path.join(class_path, image)
            if i < train_split:
                dst_dir = os.path.join(train_dir, class_name)
            elif i < val_split:
                dst_dir = os.path.join(val_dir, class_name)
            else:
                dst_dir = os.path.join(test_dir, class_name)
            
            os.makedirs(dst_dir, exist_ok=True)
            shutil.copy(src_path, dst_dir)


now we are going to use CNN for classifying the images 

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np

Load the data 

In [12]:
import os
import shutil
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import numpy as np

cwd = os.getcwd()
data_dirOuter = os.path.join(cwd, 'data')
train_dir = os.path.join(data_dirOuter, 'train')
val_dir = os.path.join(data_dirOuter, 'val')
test_dir = os.path.join(data_dirOuter, 'test')

# Define the classes
classes = ["bottle", "basket", "food", "cup", "jar", "can", "dish", "mug", "glass"]

from PIL import Image

def load_data(data_dir, image_size=(32, 32)):
    X = []
    y = []
    for class_id, class_dir in enumerate(sorted(os.listdir(data_dir)), 1):
        if os.path.isdir(os.path.join(data_dir, class_dir)):
            for image_name in os.listdir(os.path.join(data_dir, class_dir)):
                image_path = os.path.join(data_dir, class_dir, image_name)
                image = Image.open(image_path)
                image = image.resize(image_size)
                image = np.array(image) / 255.0  # Normalize the image
                X.append(image)
                y.append(class_id - 1)  # Subtract 1 to convert class_id to zero-based index
    return np.array(X), np.array(y)


# Load the data
X_train, y_train = load_data(train_dir)
X_val, y_val = load_data(val_dir)
X_test, y_test = load_data(test_dir)

# Normalize the images
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

# Define and compile the model
cnn = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1:])),
    MaxPooling2D((2, 2)),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(len(classes), activation='softmax')  # Use len(classes) as the number of output units
])

cnn.compile(optimizer=Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = cnn.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

# Evaluate the model
loss, accuracy = cnn.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Plot training history
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3586,) + inhomogeneous part.