In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from numpy import linalg as LA
from keras import backend as K
import matplotlib.pyplot as plt

In [2]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print(np.shape(x_train))
print(np.shape(x_test))

# Scale images to the [0, 1] range
x_train = x_train.astype("float32")/255
x_test = x_test.astype("float32")/255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)




(60000, 28, 28)
(10000, 28, 28)
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [3]:
28*28/np.sqrt(60000)

3.200666597236686

In [3]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="sigmoid"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="sigmoid"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"), #softmax
    ]
)

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 13, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 1600)              0         
                                                                 
 dropout (Dropout)           (None, 1600)              0

2024-02-05 12:30:47.001343: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-02-05 12:30:47.001372: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-02-05 12:30:47.001387: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-02-05 12:30:47.001440: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-05 12:30:47.001464: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [6]:
5*5*64

1600

In [4]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)


Epoch 1/15


2024-02-05 12:30:55.300438: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2024-02-05 12:31:01.826018: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x17d20f4d0>

In [5]:
test_score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", test_score[0])
print("Test accuracy:", test_score[1])
print("Test error:",1-test_score[1])
test_err=1-test_score[1]

Test loss: 0.053885675966739655
Test accuracy: 0.9830000400543213
Test error: 0.01699995994567871


In [6]:
train_score = model.evaluate(x_train, y_train, verbose=0)
print("Train loss:", train_score[0])
print("Train accuracy:", train_score[1])
print("Train error:",1-train_score[1])
train_err=1-train_score[1]

Train loss: 0.05284377560019493
Train accuracy: 0.98416668176651
Train error: 0.01583331823348999


In [7]:
A=model.get_layer(name="conv2d").get_weights()[0]
B=np.array(A)
C=np.reshape(B,(3*3,32))
tt=np.zeros(32)
for j in range(32):
  tt[j]=np.sum(abs(C[:,j]))
normg1=np.max(tt)

In [8]:
print(normg1)

8.467000961303711


In [10]:
A=model.get_layer(name="conv2d_1").get_weights()[0]
B=np.array(A)
C=np.reshape(B,(3*3,32*64))
tt=np.zeros(32*64)
for j in range(32*64):
  tt[j]=np.sum(abs(C[:,j]))
normg2=np.max(tt)

In [11]:
print(normg2)

2.31363844871521


In [12]:
A=model.get_layer(name="dense").get_weights()[0]
B=np.array(A)
normg3=LA.norm(B.transpose(),ord=1)

In [13]:
print(normg3)

2.6057014


In [14]:
# Rademacher complexity bound
d=28*28
n=len(x_train)

In [25]:
n

60000

In [15]:

Rvalue=np.sqrt((d+1)/n)

In [16]:
Rvalue

0.11438239957848993

In [17]:
A1=4*10*(1/4)*normg1*normg2*normg3*Rvalue*(1/16)+train_err

In [18]:
A1

3.6649603036599716