In [5]:
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Optimizer

class SAM(Optimizer):
    def __init__(self, lr=0.001, rho=0.5, epsilon=None, name='SAM', **kwargs):
        super(SAM, self).__init__(name=name, **kwargs)
        self.lr = K.variable(lr, name='lr')
        self.rho = K.variable(rho, name='rho')
        self.epsilon = epsilon or K.epsilon()

    
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        
        # Create empty list to hold modified parameters
        new_params = []
        
        # Iterate over all trainable parameters
        for p, g in zip(params, grads):
            # Create a new parameter variable that is a copy of the original parameter
            p_new = K.variable(p)
            
            # Compute the gradient of the loss with respect to the new parameter variable
            g_new = K.gradients(loss, p_new)[0]
            
            # Compute the sharpness-aware update
            r = K.sqrt(K.sum(K.square(p - p_new)))
            r = K.maximum(r, self.epsilon)
            p_new = p - self.lr * g / r
            
            # Apply the weight decay and update rule
            wd = p - p_new
            p_final = p_new + self.rho * wd
            
            # Add the modified parameter variable to the list of new parameters
            new_params.append(p_final)
            
            # Add the SAM update to the list of updates
            self.updates.append(K.update(p, p_final))
        
        # Add the update to increment the iterations variable
        self.weights = [self.iterations] + new_params
        return self.updates


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import cifar100

# Load the CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data()


In [11]:
# Create a simple Keras model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(32, 32, 3)))
model.add(Flatten())
model.add(Dense(100, activation='softmax', name='output'))

In [12]:
# Compile the model with the SAM optimizer
optimizer = SAM(lr=0.001, rho=0.5)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [13]:
# Train the model
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

Epoch 1/10


ValueError: in user code:

    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 994, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1052, in compute_loss
        return self.compiled_loss(
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 152, in __call__
        losses = call_fn(y_true, y_pred)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 272, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 1990, in categorical_crossentropy
        return backend.categorical_crossentropy(
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/backend.py", line 5529, in categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)

    ValueError: Shapes (None, 1) and (None, 100) are incompatible


In [15]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.utils import to_categorical


In [16]:

# Load the CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data()


In [17]:

# Convert the labels to one-hot encoding
num_classes = 100
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)


In [18]:

# Load the VGG16 model without the top layer
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))


In [19]:

# Freeze the layers in the VGG16 model
for layer in vgg_model.layers:
    layer.trainable = False


In [20]:

# Create a new model by adding a new top layer to the VGG16 model
model = Sequential()
model.add(vgg_model)
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


In [21]:

# Compile the model with the SAM optimizer
optimizer = SAM(lr=0.001, rho=0.5)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])


In [22]:

# Train the model
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))



Epoch 1/10


NotImplementedError: in user code:

    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 997, in train_step
        self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 579, in minimize
        return self.apply_gradients(grads_and_vars, name=name)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 738, in apply_gradients
        return tf.__internal__.distribute.interim.maybe_merge_call(
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 797, in _distributed_apply
        update_op = distribution.extended.update(
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 776, in apply_grad_to_update_var  **
        update_op = self._resource_apply_dense(grad, var, **apply_kwargs)
    File "/Users/jeongdahye/opt/anaconda3/lib/python3.8/site-packages/keras/optimizers/optimizer_v2/optimizer_v2.py", line 1422, in _resource_apply_dense
        raise NotImplementedError(

    NotImplementedError: `_resource_apply_dense` must be implemented in subclasses.
