In [8]:
import tensorflow as tf
import pprint
import numpy as np

# Log in to your W&B account
!pip install wandb
import wandb
wandb.login()
# API: 18fe909a998e642a33986bd03c61ee2c6922f72d


# Loading the MNIST dataset in one line
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train_normalized = x_train/255
x_test_normalized = x_test/255
x_train= x_train_normalized.reshape(-1, 28, 28, 1)
x_test = x_test_normalized.reshape(-1, 28, 28, 1)

# Printing the shape
print('x_train:', x_train.shape)
print('y_train:', y_train.shape)
print('x_test:', x_test.shape)
print('y_test:', y_test.shape)





x_train: (60000, 28, 28, 1)
y_train: (60000,)
x_test: (10000, 28, 28, 1)
y_test: (10000,)


In [None]:
def Net(n1 = 28, n2 = 28):
    return tf.keras.models.Sequential(
    [
    tf.keras.layers.InputLayer(input_shape=(28,28,1)),

    tf.keras.layers.Conv2D(filters=n1, kernel_size=3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.005)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=n1, kernel_size=3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=n1, kernel_size=5, strides=2, padding='same', activation='relu', use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Conv2D(filters=n2, kernel_size=3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.005)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=n2, kernel_size=3, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(filters=n2, kernel_size=5, strides=2, padding='same', activation='relu', use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(82, kernel_regularizer = tf.keras.regularizers.l2(0.07), activation = 'relu'),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.GaussianNoise(0.75),

    tf.keras.layers.Dense(54, kernel_regularizer = tf.keras.regularizers.l2(0.07), activation = 'sigmoid'),
    tf.keras.layers.BatchNormalization(),

    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.GaussianNoise(0.75),
    tf.keras.layers.Dense(10, activation='softmax', use_bias=False)
    ]
)

#model = Net()
#model.summary()

def get_optimizer(lr=1e-3, optimizer="adam"):
    "Select optmizer between adam and sgd with momentum"
    if optimizer.lower() == "adam":
        return tf.keras.optimizers.Adam(learning_rate=lr)
    if optimizer.lower() == "sgd":
        return tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.1)

def train(model,
          batch_size=64,
          epochs=10, 
          lr=1e-3, 
          optimizer='adam', 
          log_freq='epoch', 
          rotate = 10, 
          zoom = 0.05,
          shear = 0.05,
          shift = 0.05,
          rescale = 0.05):
    
    # Compile model like you usually do.
    tf.keras.backend.clear_session()

    # Compile modell
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() #from_logits=True
    sampleID = 100
    loss_fn(y_train[:1], model(x_train[sampleID-1:sampleID]).numpy()).numpy()

    model.compile(loss=loss_fn,
                  optimizer=get_optimizer(lr, optimizer),
                  metrics=['accuracy'])

    # callback setup
    wandb_callbacks = [wandb.keras.WandbCallback(log_freq=log_freq,save_model=(False),) ]
    
    checkpoint_filepath = 'tmp/model.{val_accuracy:.4f}.h5'
    model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_filepath,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True,
        save_freq= 'epoch'
    )

    datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=rotate,  # randomly rotate images in the range (degrees, 0 to 180)
                zoom_range = zoom, # Randomly zoom image
                shear_range=shear,
                width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
                height_shift_range=shift,  # randomly shift images vertically (fraction of total height)
                rescale=rescale,
                horizontal_flip=False,  # randomly flip images
                vertical_flip=False)  # randomly flip images
    datagen.fit(x_train)  
        
    # Train your model
    model.fit(
        datagen.flow(x_train, y_train),
        #x_train,
        #y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data = (x_test, y_test),
        callbacks = [wandb_callbacks, model_checkpoint]
        )

# Configure Sweep
sweep_config = {
    'method': 'bayes',
    'metric': {
        'goal': 'maximize',
        'name': 'val_accuracy'
    }
}
parameters_dict = {
    'optimizer': {
        'value': 'adam'
        },
    'epochs': {
        'value': 50
        },
    'batch_size' : {
        'values': [32,256]
        },
    'learning_rate': {
        'values': [1e-5,1e-4,1e-3]
    },
     'n1': {
        'value': 28
    },
    'n2': {
        'values': [28,56]
    },
    'rotate': {
        'values': [0, 5, 10, 20, 45]
    },
    'zoom': {
        'values': [0, 0.05, 0.1, 0.15]
    },
    'shear': {
        'values': [0, 0.05, 0.1, 0.15]
    },
    'shift': {
        'values': [0, 0.05, 0.1, 0.15]
    },
    'rescale': {
        'values': [0, 0.05]
    }    
}
sweep_config['parameters'] = parameters_dict
pprint.pprint(sweep_config)

def sweep_train(config_defaults=None):
    # Initialize wandb with a sample project name
    with wandb.init(config=config_defaults):  # this gets over-written in the Sweep

        # Specify the other hyperparameters to the configuration, if any
        wandb.config.architecture_name = "V06"
        wandb.config.dataset_name = "MNIST"
        
        print('a')
        
        # initialize model
        model = Net(wandb.config.n1, wandb.config.n2)
        
        train(model,
              wandb.config.batch_size,
              wandb.config.epochs,
              wandb.config.learning_rate,
              wandb.config.optimizer,
              'epoch',
              wandb.config.rotate,
              wandb.config.zoom,
              wandb.config.shear,
              wandb.config.shift,
              wandb.config.rescale)

sweep_id = wandb.sweep(sweep_config, project="V06_Datagen")
wandb.agent(sweep_id, function=sweep_train, count=30)

{'method': 'bayes',
 'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
 'parameters': {'batch_size': {'values': [32, 256]},
                'epochs': {'value': 50},
                'learning_rate': {'values': [1e-05, 0.0001, 0.001]},
                'n1': {'value': 28},
                'n2': {'values': [28, 56]},
                'optimizer': {'value': 'adam'},
                'rescale': {'values': [0, 0.05]},
                'rotate': {'values': [0, 5, 10, 20, 45]},
                'shear': {'values': [0, 0.05, 0.1, 0.15]},
                'shift': {'values': [0, 0.05, 0.1, 0.15]},
                'zoom': {'values': [0, 0.05, 0.1, 0.15]}}}




Create sweep with ID: dixr5d7a
Sweep URL: https://wandb.ai/luchalt/V06_Datagen/sweeps/dixr5d7a


In [None]:
# after the training finishes, we will also save Marvin in Keras style (HDF5), so we do not have to
# train him again
# every time we start our computer. Obviously, by changing the model_name, you can also save different
# configurations of Marvin. The name has to be a string, like this: 'name.h5'
model_name = 'pepsi_p62k_a9915'
model.save(model_name, save_format='h5')

# It is best practice to indicate what configuration changes you did within the name, so you know
# which model you need to load already from its name
# Let's say instead of a learning rate of 0.001 you used 0.1, your naming could then look like:
# 'marvin_lr01.h5'

print('Success! You saved Marvin as: ', model_name)

In [None]:
# load a saved marvin configuration you want to evaluate
model_name = 'pepsi_lr03'
pepsi_reloaded = tf.keras.models.load_model(model_name)

# Let Marvin predict on the test set, so we have some data to evaluate his performance.
predictions = pepsi_reloaded.predict([x_test])
# predictions = pepsi.predict([x_test])

# Remember that the prediction of Marvin is a probability distribution over all ten-digit classes
# We want him to assign the digit class with the highest probability to the sample.
predictions = np.argmax(predictions, axis=1)
#pd.DataFrame(predictions)

# Plot for the intuitive approach

numbers_to_display = 196
num_cells = math.ceil(math.sqrt(numbers_to_display))
plt.figure(figsize=(15, 15))

for plot_index in range(numbers_to_display):
    predicted_label = predictions[plot_index]
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    color_map = 'Greens' if predicted_label == y_test[plot_index] else 'Reds'
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(x_test_normalized[plot_index].reshape((28, 28)), cmap=color_map)
    plt.xlabel(predicted_label)

plt.subplots_adjust(hspace=1, wspace=0.5)
plt.show()

In [None]:
confusion_matrix = tf.math.confusion_matrix(y_test, predictions)

f, ax = plt.subplots(figsize=(9, 7))
sn.heatmap(
    confusion_matrix,
    annot=True,
    linewidths=.7,
    fmt="d",
    square=True,
    ax=ax,
    cmap="viridis",
)
plt.show()