In [1]:
import numpy as np
import os
import cv2
import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

In [2]:
def load_images_from_folder(folder, img_size):
    images = []
    labels = []
    
    img_files = glob.glob(os.path.join(folder, '*.png'))
    print(f"Found {len(img_files)} images.")
    
    for img_path in img_files:
        filename = os.path.basename(img_path)
        class_label = int(filename.split('_')[2].split('.')[0])  # Extract class from filename
        
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(class_label)
        else:
            print(f"Failed to load image: {img_path}")
    
    return np.array(images), np.array(labels)

In [3]:
base_folder = os.getcwd()
data_folder = os.path.join(base_folder, 'Decals_data_images')
img_size = (128, 128)  # Updated size

X, y = load_images_from_folder(data_folder, img_size)

X = X / 255.0

encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y.reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

print(f'Training samples: {X_train.shape[0]}')
print(f'Testing samples: {X_test.shape[0]}')

Found 17736 images.
Training samples: 14188
Testing samples: 3548


In [4]:
class SimpleCNN:
    def __init__(self, input_shape, num_classes):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.initialize_weights()

    def initialize_weights(self):
        np.random.seed(42)
        self.W1 = np.random.randn(3, 3, 3, 16).astype(np.float64) * 0.1
        self.b1 = np.zeros((16,), dtype=np.float64)
        self.W2 = np.random.randn(3, 3, 16, 32).astype(np.float64) * 0.1
        self.b2 = np.zeros((32,), dtype=np.float64)
        # Modified for 128x128 input: After two max-pooling layers, feature map will be 32x32
        self.W3 = np.random.randn(32 * 32 * 32, 128).astype(np.float64) * 0.1
        self.b3 = np.zeros((128,), dtype=np.float64)
        self.W4 = np.random.randn(128, self.num_classes).astype(np.float64) * 0.1
        self.b4 = np.zeros((self.num_classes,), dtype=np.float64)

    def relu(self, Z):
        return np.maximum(0, Z)

    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return expZ / expZ.sum(axis=1, keepdims=True)

    def max_pool(self, A, pool_size=2, stride=2):
        n, h, w, c = A.shape
        h_out = (h - pool_size) // stride + 1
        w_out = (w - pool_size) // stride + 1
        
        output = np.zeros((n, h_out, w_out, c))
        self.max_pool_masks = {}  
        
        for i in range(h_out):
            for j in range(w_out):
                h_start = i * stride
                h_end = h_start + pool_size
                w_start = j * stride
                w_end = w_start + pool_size
                
                window = A[:, h_start:h_end, w_start:w_end, :]
                window_reshaped = window.reshape(n, -1, c)
                max_indices = window_reshaped.argmax(axis=1)

                mask = np.zeros_like(window)
                for batch in range(n):
                    for channel in range(c):
                        idx = max_indices[batch, channel]
                        h_idx, w_idx = np.unravel_index(idx, (pool_size, pool_size))
                        mask[batch, h_idx, w_idx, channel] = 1

                self.max_pool_masks[(i, j)] = mask
                output[:, i, j, :] = window_reshaped.max(axis=1)
        
        return output

    def max_pool_backward(self, dout, pool_size=2, stride=2):
        n, h, w, c = dout.shape
        h_out = h * stride
        w_out = w * stride
        dx = np.zeros((n, h_out, w_out, c))
        
        for i in range(h):
            for j in range(w):
                h_start = i * stride
                h_end = h_start + pool_size
                w_start = j * stride
                w_end = w_start + pool_size
                
                mask = self.max_pool_masks.get((i, j))
                if mask is None:
                    continue
                    
                # Ensure mask channels match output channels
                if mask.shape[-1] != c:
                    new_mask = np.zeros((n, pool_size, pool_size, c))
                    min_channels = min(mask.shape[-1], c)
                    new_mask[..., :min_channels] = mask[..., :min_channels]
                    mask = new_mask
                
                # Broadcast the gradient
                dx[:, h_start:h_end, w_start:w_end, :] += (
                    mask * dout[:, i:i+1, j:j+1, :].reshape(n, 1, 1, -1)
                )
        
        return dx

    def forward(self, X):
        X = X.astype(np.float64)
        
        # Modified for 128x128 input
        self.Z1 = np.zeros((X.shape[0], 128, 128, 16), dtype=np.float64)
        for i in range(X.shape[0]):
            for k in range(self.W1.shape[3]):
                conv_result = np.zeros((128, 128), dtype=np.float64)
                for c in range(3):
                    kernel = self.W1[:, :, c, k]
                    conv_result += cv2.filter2D(X[i, :, :, c], -1, kernel)
                self.Z1[i, :, :, k] = conv_result
        self.Z1 += self.b1
        self.A1 = self.relu(self.Z1)
        self.P1 = self.max_pool(self.A1)  # Output: 64x64
        
        # Modified dimensions for second conv layer
        self.Z2 = np.zeros((self.P1.shape[0], 64, 64, 32), dtype=np.float64)
        for i in range(self.P1.shape[0]):
            for k in range(self.W2.shape[3]):
                conv_result = np.zeros((64, 64), dtype=np.float64)
                for c in range(16):
                    kernel = self.W2[:, :, c, k]
                    conv_result += cv2.filter2D(self.P1[i, :, :, c], -1, kernel)
                self.Z2[i, :, :, k] = conv_result
        self.Z2 += self.b2
        self.A2 = self.relu(self.Z2)
        self.P2 = self.max_pool(self.A2)  # Output: 32x32
        
        self.F = self.P2.reshape(X.shape[0], -1)
        self.Z3 = np.dot(self.F, self.W3) + self.b3
        self.A3 = self.relu(self.Z3)
        self.Z4 = np.dot(self.A3, self.W4) + self.b4
        self.A4 = self.softmax(self.Z4)
        
        return self.A4

    def conv2d_backward(self, input_data, kernel_size, output_gradient):
        input_gradient = np.zeros_like(input_data)
        kernel_gradient = np.zeros(kernel_size)
        pad_size = (kernel_size[0] - 1) // 2
    
        padded_input = np.pad(input_data, 
                         ((0, 0), (pad_size, pad_size), 
                          (pad_size, pad_size), (0, 0)), 
                         mode='constant')

        for i in range(input_data.shape[0]):
            for c_out in range(output_gradient.shape[-1]):
                for c_in in range(input_data.shape[-1]):
                    kernel = np.flip(np.flip(self.W2[:, :, c_in, c_out], axis=0), axis=1)
                    input_gradient[i, :, :, c_in] += cv2.filter2D(
                        output_gradient[i, :, :, c_out].astype(np.float32),
                        -1, 
                        kernel.astype(np.float32),
                        borderType=cv2.BORDER_CONSTANT
                    )
    
        for i in range(input_data.shape[0]):
            for c_out in range(output_gradient.shape[-1]):
                for c_in in range(input_data.shape[-1]):
                    for h in range(kernel_size[0]):
                        for w in range(kernel_size[1]):
                            kernel_gradient[h, w, c_in, c_out] += np.sum(
                                padded_input[i, h:h+output_gradient.shape[1], w:w+output_gradient.shape[2], c_in] *
                                output_gradient[i, :, :, c_out]
                            )
    
        return input_gradient, kernel_gradient
    

    def backward(self, X, Y, learning_rate):
        m = X.shape[0]

        dZ4 = self.A4 - Y
        dW4 = np.dot(self.A3.T, dZ4) / m
        db4 = np.sum(dZ4, axis=0) / m

        dA3 = np.dot(dZ4, self.W4.T)
        dZ3 = dA3 * (self.A3 > 0)
        dW3 = np.dot(self.F.T, dZ3) / m
        db3 = np.sum(dZ3, axis=0) / m
        dF = np.dot(dZ3, self.W3.T).reshape(self.P2.shape)
        dP2 = dF
        dA2 = self.max_pool_backward(dP2)
        dZ2 = dA2 * (self.A2 > 0)
        
        dP1, dW2 = self.conv2d_backward(self.P1, self.W2.shape, dZ2)
        db2 = np.sum(dZ2, axis=(0, 1, 2)) / m
        
        dA1 = self.max_pool_backward(dP1)
        dZ1 = dA1 * (self.A1 > 0)
        
        _, dW1 = self.conv2d_backward(X, self.W1.shape, dZ1)
        db1 = np.sum(dZ1, axis=(0, 1, 2)) / m

        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W3 -= learning_rate * dW3
        self.b3 -= learning_rate * db3
        self.W4 -= learning_rate * dW4
        self.b4 -= learning_rate * db4

    def train(self, X, Y, epochs, learning_rate):
        for epoch in range(epochs):
            Y_pred = self.forward(X)
            loss = -np.mean(np.sum(Y * np.log(Y_pred + 1e-8), axis=1))
            print(f"Epoch {epoch + 1}, Loss: {loss}")
            self.backward(X, Y, learning_rate)

    def evaluate(self, X, Y_true):
        Y_pred = self.forward(X)
        y_pred_classes = np.argmax(Y_pred, axis=1)
        y_true_classes = np.argmax(Y_true, axis=1)
        print(classification_report(y_true_classes, y_pred_classes))

In [5]:
import numpy as np
from tqdm import tqdm
import time
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

def train_with_parallel_batches_fast(cnn, X_train, y_train, epochs, learning_rate, batch_size=256):
    """Faster training using parallel batch processing"""
    num_samples = X_train.shape[0]
    num_batches = num_samples // batch_size
    
    start_time = time.time()
    
    # Convert data to float32 for faster processing
    X_train = X_train.astype(np.float64)
    y_train = y_train.astype(np.float64)
    
    # Use multiple CPU cores for batch processing
    num_cores = mp.cpu_count() // 2  # Use half of available cores
    
    for epoch in range(epochs):
        epoch_start = time.time()
        print(f"Epoch {epoch + 1}/{epochs}")
        
        # Shuffle data
        indices = np.random.permutation(num_samples)
        X_shuffled = X_train[indices]
        y_shuffled = y_train[indices]
        
        epoch_losses = []
        
        # Process batches with progress bar
        with tqdm(total=num_batches, desc=f"Epoch {epoch + 1}", unit="batch") as pbar:
            for i in range(0, num_samples, batch_size):
                batch_end = min(i + batch_size, num_samples)
                X_batch = X_shuffled[i:batch_end]
                y_batch = y_shuffled[i:batch_end]
                
                # Forward pass
                Y_pred = cnn.forward(X_batch)
                loss = -np.mean(np.sum(y_batch * np.log(Y_pred + 1e-8), axis=1))
                
                # Backward pass
                cnn.backward(X_batch, y_batch, learning_rate)
                
                epoch_losses.append(loss)
                pbar.set_postfix(loss=loss)
                pbar.update(1)
        
        epoch_end = time.time()
        epoch_time = epoch_end - epoch_start
        avg_loss = np.mean(epoch_losses)
        print(f"Epoch {epoch + 1} completed in {epoch_time:.2f} seconds")
        print(f"Average loss: {avg_loss:.4f}")
        
        # Early stopping if loss is good enough
        if avg_loss < 0.1:  # Adjust this threshold based on your needs
            print("Loss threshold reached, stopping early")
            break
    
    total_time = time.time() - start_time
    print(f"Total training time: {total_time:.2f} seconds")

# Usage:
cnn = SimpleCNN(input_shape=(128, 128, 3), num_classes=y_train.shape[1])

# Train with faster implementation
train_with_parallel_batches_fast(
    cnn, 
    X_train, 
    y_train, 
    epochs=10,  # Reduced epochs
    learning_rate=0.001,
    batch_size=128  # Increased batch size
)

# Quick evaluation
y_pred = cnn.forward(X_test)
pred_classes = np.argmax(y_pred, axis=1)
true_classes = np.argmax(y_test, axis=1)
accuracy = np.mean(pred_classes == true_classes)
print(f"Test accuracy: {accuracy:.4f}")

Epoch 1/10


Epoch 1: 111batch [1:04:15, 34.73s/batch, loss=2.23]                      


Epoch 1 completed in 3870.20 seconds
Average loss: 2.3828
Epoch 2/10


Epoch 2: 111batch [1:00:33, 32.74s/batch, loss=2.27]                      


Epoch 2 completed in 3654.35 seconds
Average loss: 2.2439
Epoch 3/10


Epoch 3: 111batch [59:58, 32.42s/batch, loss=2.25]                      


Epoch 3 completed in 3621.21 seconds
Average loss: 2.2333
Epoch 4/10


Epoch 4: 111batch [59:58, 32.42s/batch, loss=2.25]                      


Epoch 4 completed in 3620.97 seconds
Average loss: 2.2211
Epoch 5/10


Epoch 5: 111batch [59:46, 32.31s/batch, loss=2.24]                      


Epoch 5 completed in 3607.49 seconds
Average loss: 2.2100
Epoch 6/10


Epoch 6: 111batch [59:43, 32.28s/batch, loss=2.17]                      


Epoch 6 completed in 3604.42 seconds
Average loss: 2.1999
Epoch 7/10


Epoch 7: 111batch [59:46, 32.31s/batch, loss=2.16]                      


Epoch 7 completed in 3606.03 seconds
Average loss: 2.1902
Epoch 8/10


Epoch 8: 111batch [1:01:19, 33.14s/batch, loss=2.17]                      


Epoch 8 completed in 3701.60 seconds
Average loss: 2.1821
Epoch 9/10


Epoch 9: 111batch [1:00:01, 32.44s/batch, loss=2.13]                      


Epoch 9 completed in 3626.86 seconds
Average loss: 2.1744
Epoch 10/10


Epoch 10: 111batch [1:00:03, 32.46s/batch, loss=2.15]                      


Epoch 10 completed in 3629.96 seconds
Average loss: 2.1663
Total training time: 36562.38 seconds


KeyboardInterrupt: 

In [5]:
y_pred = cnn.forward(X_test)
pred_classes = np.argmax(y_pred, axis=1)
true_classes = np.argmax(y_test, axis=1)
accuracy = np.mean(pred_classes == true_classes)
print(f"Test accuracy: {accuracy:.4f}")

NameError: name 'cnn' is not defined

In [6]:
import joblib
model = SimpleCNN(input_shape=(128, 128, 3), num_classes=10)

joblib.dump(model, 'cnn_model.pkl')
print("Model saved successfully!")

Model saved successfully!


In [7]:
loaded_model = joblib.load('cnn_model.pkl')
print("Model loaded successfully!")

Model loaded successfully!
