In [None]:
from IPython.display import display, clear_output
import numpy as np
import time
import math
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
from scipy.spatial import distance

import tensorflow as tf
print('Tensorflow version:', tf.__version__, '\n')

# Keras is a model-level library, meaning that it is built upon tensorflow (using it as a backend) - allowing for
# high-level building blocks. Making it even easier to design neural networks.
# We will access it as tf.keras

# The tf and k abbreviations are best practice (same for numpy np and pandas pd),
# since you do not want to type T E N S O R F L O W all over your code.
# They are prevalent all over the industry and academia in a way that you'll risk a fight if you import them differently.

# Loading the MNIST dataset in one line
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train_normalized = x_train/255
x_test_normalized = x_test/255

# Berechnung der ähnlichsten Ziffern für jede Klasse von 0 bis 9
for digit in range(10):
    # Filtern der Ziffern nach ihrer Klasse
    class_images = train_images[train_labels == digit]

    # Berechnung der durchschnittlichen Cosinus-Ähnlichkeit für jede Ziffer zu anderen Ziffern derselben Klasse
    similarities = []
    for i, image in enumerate(class_images):
        avg_similarity = 0
        for other_image in class_images:
            if not np.array_equal(image, other_image):
                # Umwandlung von 28x28 Bildern in Vektoren für Cosinus-Ähnlichkeit
                image_vector = image.flatten()
                other_image_vector = other_image.flatten()
                # Berechnung der Cosinus-Ähnlichkeit
                cosine_similarity = 1 - distance.cosine(image_vector, other_image_vector)
                avg_similarity += cosine_similarity
        avg_similarity /= len(class_images) - 1  # Durchschnittliche Ähnlichkeit zu allen anderen Ziffern der Klasse außer sich selbst
        similarities.append((i, avg_similarity))

    # Sortieren nach der durchschnittlichen Ähnlichkeit und Auswahl der ähnlichsten Ziffer
    similarities.sort(key=lambda x: x[1], reverse=True)
    most_similar_index = similarities[0][0]

    most_similar_digit = class_images[most_similar_index]

    # Anzeige der ähnlichsten Ziffer für jede Klasse
    plt.subplot(2, 5, digit + 1)
    plt.imshow(most_similar_digit, cmap='gray')
    plt.title(f'Most similar {digit}')
    plt.axis('off')

plt.tight_layout()
plt.show()

# in the next step, we also need to reshape our input to fit our input layer later on.
# This is due to keras expecting a definition for how many channels your input sample has, as we
# deal with gray scale this is 1.
x_train= x_train_normalized.reshape(-1, 28, 28, 1)
x_test = x_test_normalized.reshape(-1, 28, 28, 1)

# Printing the shape
print('x_train:', x_train.shape)
print('y_train:', y_train.shape)
print('x_test:', x_test.shape)
print('y_test:', y_test.shape)

Tensorflow version: 2.13.0 



In [36]:
# This is the moment where you define your model's architecture

pepsi = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(28,28,1)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.GaussianNoise(0.75),
    tf.keras.layers.Dense(10, activation='softmax', use_bias=False)
    ])

pepsi.summary()

# Define your loss
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# print(-tf.math.log(1/10))

sampleID = 100
loss_fn(y_train[:1], pepsi(x_train[sampleID-1:sampleID]).numpy()).numpy()

# Compiling basically means to prepare the training routine for your model which consists of the optimizer,
# the loss, and the metrics which are to be reported during training

pepsi.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
              loss= loss_fn,
              metrics=['accuracy'])

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_6 (Flatten)         (None, 4)                 0         
                                                                 
 dropout_13 (Dropout)        (None, 4)                 0         
                                                                 
 gaussian_noise_6 (Gaussian  (None, 4)                 0         
 Noise)                                                          
                                                                 
 dense_10 (Dense)            (None, 10)                40        
                                                                 
Total params: 40 (160.00 Byte)
Trainable params: 40 (160.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


ValueError: Input 0 of layer "sequential_10" is incompatible with the layer: expected shape=(None, 2, 2, 1), found shape=(1, 28, 28, 1)

In [34]:
vlr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor = 0.8, patience =2, min_lr=0.00001)
vlr2 = tf.keras.callbacks.LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)

er = tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=1,
        restore_best_weights=True
)
checkpoint_filepath = 'tmp/model.{val_accuracy:.4f}.h5'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True,
    save_freq= 'epoch'
)

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
            rotation_range=15,  
            zoom_range = 0.15,
            shear_range=0.05,
            width_shift_range=0.1, 
            height_shift_range=0.1,
            rescale=0,
            fill_mode = 'nearest',
            horizontal_flip=False,
            vertical_flip=False)
datagen.fit(x_train)

history = pepsi.fit(
    datagen.flow(x_train, y_train, batch_size = 32),
   # x_train,
    #y_train,
   # batchsize = 256,
    epochs=200,
    validation_data=(x_test, y_test),
    callbacks=[model_checkpoint]#, vlr]
)
(test_loss, test_acc) = pepsi.evaluate(x_test, y_test)
print("Loss: ", test_loss)
print("Accuracy: ", test_acc)

model_name = 'tmpt/model.{test_acc:.4f}.h5'
pepsi.save(model_name, save_format='h5')

plt.figure(figsize=(13, 5))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Train','Test'])
plt.grid()
plt.show()

ValueError: `x` (images tensor) and `y` (labels) should have the same length. Found: x.shape = (11760000, 2, 2, 1), y.shape = (60000,)

In [None]:
# This line would start up tensorboard for you
%tensorboard --logdir logs --host localhost

In [None]:
# after the training finishes, we will also save Marvin in Keras style (HDF5), so we do not have to
# train him again
# every time we start our computer. Obviously, by changing the model_name, you can also save different
# configurations of Marvin. The name has to be a string, like this: 'name.h5'
model_name = 'pepsi_V03e_99.6'
pepsi.save(model_name, save_format='h5')

# It is best practice to indicate what configuration changes you did within the name, so you know
# which model you need to load already from its name
# Let's say instead of a learning rate of 0.001 you used 0.1, your naming could then look like:
# 'marvin_lr01.h5'

print('Success! You saved Marvin as: ', model_name)

In [None]:
# Plot for the intuitive approach

numbers_to_display = 196
num_cells = math.ceil(math.sqrt(numbers_to_display))
plt.figure(figsize=(15, 15))

for plot_index in range(numbers_to_display):
    predicted_label = predictions[plot_index]
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    color_map = 'Greens' if predicted_label == y_test[plot_index] else 'Reds'
    plt.subplot(num_cells, num_cells, plot_index + 1)
    plt.imshow(x_test_normalized[plot_index].reshape((28, 28)), cmap=color_map)
    plt.xlabel(predicted_label)

plt.subplots_adjust(hspace=1, wspace=0.5)
plt.show()

In [None]:
# load a saved marvin configuration you want to evaluate
model_name = 'model.77-0.9975.h5'
pepsi_reloaded = tf.keras.models.load_model(model_name)

# Let Marvin predict on the test set, so we have some data to evaluate his performance.
predictions = pepsi_reloaded.predict([x_test])
# predictions = pepsi.predict([x_test])

# Remember that the prediction of Marvin is a probability distribution over all ten-digit classes
# We want him to assign the digit class with the highest probability to the sample.
predictions = np.argmax(predictions, axis=1)
#pd.DataFrame(predictions)

In [None]:

(test_loss, test_acc) = pepsi_reloaded.evaluate(x_test, y_test)
print("Loss: ", test_loss)
print("Accuracy: ", test_acc)

confusion_matrix = tf.math.confusion_matrix(y_test, predictions)

f, ax = plt.subplots(figsize=(9, 7))
sn.heatmap(
    confusion_matrix,
    annot=True,
    linewidths=.7,
    fmt="d",
    square=True,
    ax=ax,
    cmap="viridis",
)
plt.show()