# Laboratory 5

## Task 1
Create dummy input and target data as follows:
```python
import numpy as np
title_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))
text_body_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))
tags_data = np.random.randint(0, 2, size=(num_samples, num_tags))

priority_data = np.random.random(size=(num_samples, 1))
department_data = np.random.randint(0, 2, size=(num_samples, num_departments))
```

- Next compile model with:
    - `rmsprop` omptimizer, 
    - the `mean_squared_error` loss function for priority, the `categorical_crossentropy` loss function for department, 
    - the `mean_absolute_error` metric for priority and the `accuracy` metric for department.
- Fit the model choosing the number of epochs=5.
- Evaluate the model. What are the values of loss functions and metrics for target data?
- Do predictions using the model. What department was predicted by the model?
- What are the model layers?
- Print inputs and outputs for all layers.

Add another output to the previous model — we want to estimate how long a given issue ticket will take to resolve. Do this via a classification layer over three categories: `quick`, `medium`, and `difficult`. Don’t recreate a model from scratch but start from the intermediate features of the previous model:
```python
features = model.layers[4].output                                            
difficulty = layers.Dense(3, activation="softmax", name="difficulty")(features)
```

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
# Set parameters
num_samples = 1000
vocabulary_size = 100
num_tags = 10
num_departments = 5

# Generate dummy input data
title_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))
text_body_data = np.random.randint(0, 2, size=(num_samples, vocabulary_size))
tags_data = np.random.randint(0, 2, size=(num_samples, num_tags))

# Generate dummy target data
priority_data = np.random.random(size=(num_samples, 1))
department_data = np.random.randint(0, 2, size=(num_samples, num_departments))

# Define model inputs
title_input = keras.Input(shape=(vocabulary_size,), name="title")
text_body_input = keras.Input(shape=(vocabulary_size,), name="text_body")
tags_input = keras.Input(shape=(num_tags,), name="tags")

In [None]:
# Concatenate all inputs
x = layers.concatenate([title_input, text_body_input, tags_input])
x = layers.Dense(64, activation="relu")(x)
x = layers.Dense(32, activation="relu")(x)

In [None]:
# Define model outputs
priority_output = layers.Dense(1, name="priority")(x)
department_output = layers.Dense(num_departments, activation="softmax", name="department")(x)

In [None]:
# Build the model
model = keras.Model(
    inputs=[title_input, text_body_input, tags_input],
    outputs=[priority_output, department_output]
)

In [None]:
# Compile the model
model.compile(
    optimizer="rmsprop",
    loss={"priority": "mean_squared_error", "department": "categorical_crossentropy"},
    metrics={"priority": "mean_absolute_error", "department": "accuracy"}
)

In [None]:
# Train the model
history = model.fit(
    {"title": title_data, "text_body": text_body_data, "tags": tags_data},
    {"priority": priority_data, "department": department_data},
    epochs=5,
    batch_size=32
)

In [None]:
# Evaluate the model
results = model.evaluate(
    {"title": title_data, "text_body": text_body_data, "tags": tags_data},
    {"priority": priority_data, "department": department_data}
)
print("Evaluation results:", results)

In [None]:
# Make predictions
preds = model.predict({"title": title_data, "text_body": text_body_data, "tags": tags_data})
predicted_departments = np.argmax(preds[1], axis=1)
print("Predicted departments:", predicted_departments[:10])

# Print model summary
model.summary()

# Print input and output shapes for all layers
for layer in model.layers:
    input_shape = getattr(layer, "input_shape", getattr(layer, "batch_input_shape", "N/A"))
    output_shape = getattr(layer, "output_shape", "N/A")
    print(f"Layer: {layer.name}, Input shape: {input_shape}, Output shape: {output_shape}")

In [None]:
# Add new output for existing model
features = model.layers[4].output  # layer Dense(32)
difficulty = layers.Dense(3, activation="softmax", name="difficulty")(features)

model2 = keras.Model(
    inputs=model.inputs,
    outputs=[priority_output, department_output, difficulty]
)
model2.summary()

## Task 2
Implement  a custom metric that measures the mean absolute error (MAE). Next create model using `get_mnist_model()` and evaluate model using test data. What is the value of your MAE metrics?

In [None]:
import tensorflow as tf

In [None]:
def custom_mae(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred_labels = tf.cast(tf.argmax(y_pred, axis=-1), tf.float32)
    return tf.reduce_mean(tf.abs(y_true - y_pred_labels))

In [None]:
# Load MNIST data i model
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()
train_images = train_images.reshape((-1, 28*28)).astype("float32") / 255
test_images = test_images.reshape((-1, 28*28)).astype("float32") / 255

def get_mnist_model():
    inputs = keras.Input(shape=(28*28,))
    x = layers.Dense(64, activation="relu")(inputs)
    outputs = layers.Dense(10, activation="softmax")(x)
    return keras.Model(inputs, outputs)

model = get_mnist_model()
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=[custom_mae])
model.fit(train_images, train_labels, epochs=2, batch_size=64)
results = model.evaluate(test_images, test_labels)
print("Custom MAE:", results[1])

## Task 3
Define function `scheduler(epoch, lr)` which for the number of epochs - `epoch` and a learning rate - `lr`, updates `lr` as follows:
$lr\cdot\exp\left(-\frac{epoch}{10^2}\right)$. Modify callbacks_list from the lecture by adding  `keras.callbacks.LearningRateScheduler(scheduler)` and monitoring only `val_loss`. Next create model using the `get_mnist_model()` function, compile and fit the model with same parameters as in the lecture. Compare evaluations of the model for validation and test data.

In [None]:
import math

In [None]:
def scheduler(epoch, lr):
    return lr * math.exp(-epoch / 100)

callback = keras.callbacks.LearningRateScheduler(scheduler)

model = get_mnist_model()
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit(
    train_images, train_labels,
    epochs=5,
    validation_data=(test_images, test_labels),
    callbacks=[callback]
)

## Task 4
Modify callback from the lecture in such a way that your callback will save a list of per-batch accuracy values during training and create a data frame of these values at the end of each epoch. Moreover, your callback should plot a graph of per-batch accuracy values for the first epoch at the end of the first epoch and plot a graph of per-batch accuracy values for all epochs aprart from the first epoch at the end of the training. For the followng code:
```python
model = get_mnist_model()
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.fit(train_images, train_labels,
          epochs=5,
          callbacks=[MetricsHistory()],
          validation_data=(val_images, val_labels))
```
you should obtain the following results 
<img src="../lectures/Lecture 5-20250525/2.png"/>
<img src="../lectures/Lecture 5-20250525/3.png"/>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
class MetricsHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        self.batch_accuracies = []
        self.epoch_accuracies = []

    def on_batch_end(self, batch, logs=None):
        self.batch_accuracies.append(logs.get("accuracy"))

    def on_epoch_end(self, epoch, logs=None):
        df = pd.DataFrame({"batch_accuracy": self.batch_accuracies})
        self.epoch_accuracies.append(self.batch_accuracies.copy())
        if epoch == 0:
            plt.figure()
            plt.plot(self.batch_accuracies)
            plt.title("Per-batch accuracy (epoch 1)")
            plt.show()
        self.batch_accuracies = []

    def on_train_end(self, logs=None):
        if len(self.epoch_accuracies) > 1:
            plt.figure()
            for i, acc in enumerate(self.epoch_accuracies[1:]):
                plt.plot(acc, label=f"Epoch {i+2}")
            plt.title("Per-batch accuracy (kolejne epoki)")
            plt.legend()
            plt.show()

In [None]:
model = get_mnist_model()
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.fit(train_images, train_labels, epochs=5, callbacks=[MetricsHistory()], validation_data=(test_images, test_labels))