In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
import numpy as np

# Load training data
train_data = pd.read_csv('../data/train.csv')
x_train = train_data.drop('label', axis=1).values.reshape(-1, 28, 28, 1)
y_train = train_data['label'].values

# Load testing data
test_data = pd.read_csv('../data/test.csv')
x_test = test_data.drop('label', axis=1).values.reshape(-1, 28, 28, 1)
y_test = test_data['label'].values

# Normalize pixel values to be between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

# Define depthwise separable convolution block
def depthwise_separable_conv_block(x, filters, kernel_size, strides):
    # Depthwise Separable Convolution Block
    x = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=strides, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters, kernel_size=(1, 1), strides=(1, 1), padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    return x

# Define ShuffleNet-like model
def shufflenet(input_shape, num_classes):
    # Input Layer
    input_tensor = layers.Input(shape=input_shape)

    # Convolution Block
    x = layers.Conv2D(24, kernel_size=(3, 3), strides=(1, 1), padding='same')(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # Depthwise Separable Convolution Blocks
    x = depthwise_separable_conv_block(x, 24, (3, 3), (1, 1))
    x = depthwise_separable_conv_block(x, 24, (3, 3), (1, 1))
    x = depthwise_separable_conv_block(x, 24, (3, 3), (1, 1))

    # Channel Shuffle Operation
    def channel_shuffle(x, groups):
        height, width, channels = x.shape.as_list()[1:]
        channels_per_group = channels // groups

        x = tf.reshape(x, [-1, height, width, groups, channels_per_group])
        x = tf.transpose(x, [0, 1, 2, 4, 3])
        x = tf.reshape(x, [-1, height, width, channels])

        return x

    x = channel_shuffle(x, groups=3)

    # Global Average Pooling
    x = layers.GlobalAveragePooling2D()(x)

    # Fully Connected Layer
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(0.5)(x)

    # Output Layer
    output_tensor = layers.Dense(num_classes, activation='softmax')(x)

    # Model
    model = models.Model(inputs=input_tensor, outputs=output_tensor)

    return model

# Example usage with input shape (28, 28, 1) and 10 output classes
input_shape = (28, 28, 1)
num_classes = 10

# Create the ShuffleNet-like model
shufflenet_model = shufflenet(input_shape, num_classes)
shufflenet_model.compile(optimizer='adam',
                         loss='sparse_categorical_crossentropy',
                         metrics=['accuracy'])

# Display model summary
shufflenet_model.summary()

# Train the model with mini-batches of size 32
batch_size = 32

# validation_split=0.2
# Train the model
shufflenet_model.fit(x_train, y_train, epochs=15, batch_size=batch_size)

# Evaluate the model
shufflenet_model.evaluate(x_test, y_test)






Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 24)        240       
                                                                 
 batch_normalization (Batch  (None, 28, 28, 24)        96        
 Normalization)                                                  
                                                                 
 re_lu (ReLU)                (None, 28, 28, 24)        0         
                                                                 
 depthwise_conv2d (Depthwis  (None, 28, 28, 24)        240       
 eConv2D)                                                        
                                                                 
 batch_normalization_1 (Bat  (None, 28, 28, 24)        96

[0.24499282240867615, 0.9261999726295471]