# Notebook for model development
This notebook should be used to develop the model submodules locally. Note, that model relevant changes have to be made in the model subclasses. Additional code in this document is only used to get a better understanding of the dataflow and debug some outputs.

## 1: Set up environment
### Install dependencies
This will use the automation `install_dependencies.sh` in order to install necessary packages. Additional packages for development can be installed afterwards.

In [1]:
! cd .. && bash automation/train_model/install_dependencies.sh

automation/train_model/install_dependencies.sh: line 1: python: command not found

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Import packages

In [2]:
import numpy as np
import matplotlib.pyplot as plt

## 2: Get the data ready
In this step, the data will be imported and preprocessed.

### Download Dataset
The dataset has to be imported using the submodule `model/import_data.py`.

In [3]:
from model.import_data import import_data

x_train, y_train, x_test, y_test = import_data()

### Inspect dataset

In [None]:
# Training set:
# Calc mean and std of pixel values
print("\nTrainingsdaten: ")
mean = np.mean(x_train)
std = np.std(x_train)
max = np.max(x_train)
min = np.min(x_train)
print(f"Anzahl: {x_train.shape[0]}")
print(f"Durchschnittlicher Pixelwert: {mean:.2f}")
print(f"Standardabweichung der Pixelwerte: {std:.2f}")
print(f"Höchster Pixelwert: {max:.2f}")
print(f"Niedrigster Pixelwert: {min:.2f}")
print(f"Bildgröße (Höhe x Breite): {x_train.shape[1]} x {x_train.shape[2]}")
print(f"Anzahl der Klassen: {len(np.unique(y_train))}")

# Show some images
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.imshow(x_train[i], cmap='gray')
    plt.title(f"{y_train[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

# Test set:
# Calc mean and std of pixel values
print("\nTestdaten: ")
mean = np.mean(x_test)
std = np.std(x_test)
max = np.max(x_test)
min = np.min(x_test)
print(f"Anzahl: {x_test.shape[0]}")
print(f"Durchschnittlicher Pixelwert: {mean:.2f}")
print(f"Standardabweichung der Pixelwerte: {std:.2f}")
print(f"Höchster Pixelwert: {max:.2f}")
print(f"Niedrigster Pixelwert: {min:.2f}")
print(f"Bildgröße (Höhe x Breite): {x_test.shape[1]} x {x_test.shape[2]}")
print(f"Anzahl der Klassen: {len(np.unique(y_test))}")

# Show some images
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.imshow(x_test[i], cmap='gray')
    plt.title(f"{y_test[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

### Preprocess dataset
The dataset is preprocessed using the submodule `model/preprocess_data.py`.

In [4]:
from model.preprocess_data import preprocess_data

x_train, x_test = preprocess_data(x_train, x_test)

In [None]:
# Training set:
# Calc mean and std of pixel values
print("\nTrainingsdaten: ")
mean = np.mean(x_train)
std = np.std(x_train)
max = np.max(x_train)
min = np.min(x_train)
print(f"Anzahl: {x_train.shape[0]}")
print(f"Durchschnittlicher Pixelwert: {mean:.2f}")
print(f"Standardabweichung der Pixelwerte: {std:.2f}")
print(f"Höchster Pixelwert: {max:.2f}")
print(f"Niedrigster Pixelwert: {min:.2f}")
print(f"Bildgröße (Höhe x Breite): {x_train.shape[1]} x {x_train.shape[2]}")
print(f"Anzahl der Klassen: {len(np.unique(y_train))}")

# Show some images
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.imshow(x_train[i], cmap='gray')
    plt.title(f"{y_train[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

# Test set:
# Calc mean and std of pixel values
print("\nTestdaten: ")
mean = np.mean(x_test)
std = np.std(x_test)
max = np.max(x_test)
min = np.min(x_test)
print(f"Anzahl: {x_test.shape[0]}")
print(f"Durchschnittlicher Pixelwert: {mean:.2f}")
print(f"Standardabweichung der Pixelwerte: {std:.2f}")
print(f"Höchster Pixelwert: {max:.2f}")
print(f"Niedrigster Pixelwert: {min:.2f}")
print(f"Bildgröße (Höhe x Breite): {x_test.shape[1]} x {x_test.shape[2]}")
print(f"Anzahl der Klassen: {len(np.unique(y_test))}")

# Show some images
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.imshow(x_test[i], cmap='gray')
    plt.title(f"{y_test[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

## 3: Model

### Create model
The model is created using `model/create_model.py`

In [5]:
from model.create_model import create_model

model = create_model()

  super().__init__(**kwargs)


### Inspect model

In [None]:
model.summary()

### Model training
The model is trained using `model_fit_model.py`
This should ideally be done using the workflow.

In [6]:
from model.fit_model import fit_model

history = fit_model(model, x_train, y_train)

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.8552 - loss: 0.4931
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9545 - loss: 0.1498
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9687 - loss: 0.1032
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9739 - loss: 0.0867
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9784 - loss: 0.0725


In [None]:
print(f"Final loss: {history.history['loss'][-1]:.4f}")
print(f"Final accuracy: {history.history['accuracy'][-1]:.4f}")

# Trainings- und Validierungsverlust
plt.figure(1)
plt.plot(history.history['loss'], label='Training Loss')
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss per epoch')
plt.xlabel('Epopch')
plt.ylabel('Loss')
plt.legend()

# Genauigkeit (Accuracy) plotten
plt.figure(2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
if 'val_accuracy' in history.history:
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

### Evaluate model
The model is evaluated using `model/evaluate_model.py`

In [7]:
from model.evaluate_model import evaluate_model

test_loss, test_acc = evaluate_model(model, x_test, y_test)

313/313 - 0s - 601us/step - accuracy: 0.9776 - loss: 0.0709


In [9]:
print(f'Test accuracy: {test_acc:.4f}')
print(f'Test accuracy: {test_acc:.4f}')


Test accuracy: 0.9776
Test accuracy: 0.9776


### Save model

In [None]:
model.save('model.keras')