# BatchNormalization

In [1]:
import tensorflow as tf

In [5]:
model=tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300,'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100,'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10,'softmax')])

In [6]:
model.summary()

# Transfer Learning

### 1- Buid the pretrained model (A)

In [8]:
import tensorflow as tf
from tensorflow import keras

# ------------------ Task A: MNIST ------------------
(X_train_A_full, y_train_A_full), (X_test_A, y_test_A) = keras.datasets.mnist.load_data()

# Scale pixels [0,255] → [0,1]
X_train_A_full = X_train_A_full / 255.0
X_test_A = X_test_A / 255.0

# Split train into training & validation
X_train_A, X_valid_A = X_train_A_full[:-5000], X_train_A_full[-5000:]
y_train_A, y_valid_A = y_train_A_full[:-5000], y_train_A_full[-5000:]


print("MNIST shapes:", X_train_A.shape, X_valid_A.shape, X_test_A.shape)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3us/step
MNIST shapes: (55000, 28, 28) (5000, 28, 28) (10000, 28, 28)


In [9]:

# Example source model (simple dense net for Task A)
model_A = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")  # output for 10 classes
])

model_A.compile(loss="sparse_categorical_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])

# Train on Task A dataset (e.g., MNIST)
history_A = model_A.fit(X_train_A, y_train_A, epochs=5,
                        validation_data=(X_valid_A, y_valid_A))


Epoch 1/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9339 - loss: 0.2197 - val_accuracy: 0.9688 - val_loss: 0.1029
Epoch 2/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9720 - loss: 0.0875 - val_accuracy: 0.9762 - val_loss: 0.0782
Epoch 3/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9809 - loss: 0.0592 - val_accuracy: 0.9804 - val_loss: 0.0722
Epoch 4/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9856 - loss: 0.0445 - val_accuracy: 0.9804 - val_loss: 0.0714
Epoch 5/5
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9891 - loss: 0.0341 - val_accuracy: 0.9830 - val_loss: 0.0696


### 2-The model the B


In [10]:
# ------------------ Task B: Fashion MNIST ------------------
(X_train_B_full, y_train_B_full), (X_test_B, y_test_B) = keras.datasets.fashion_mnist.load_data()

# Scale pixels
X_train_B_full = X_train_B_full / 255.0
X_test_B = X_test_B / 255.0

# Split train into training & validation
X_train_B, X_valid_B = X_train_B_full[:-5000], X_train_B_full[-5000:]
y_train_B, y_valid_B = y_train_B_full[:-5000], y_train_B_full[-5000:]

print("Fashion MNIST shapes:", X_train_B.shape, X_valid_B.shape, X_test_B.shape)

Fashion MNIST shapes: (55000, 28, 28) (5000, 28, 28) (10000, 28, 28)


In [11]:
# Clone the model architecture (without last layer)
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])

# Add new output layer for Task B (binary classification here)
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))


In [12]:
for layer in model_B_on_A.layers[:-1]:  # freeze all except new output layer
    layer.trainable = False

model_B_on_A.compile(loss="binary_crossentropy",
                     optimizer="adam",
                     metrics=["accuracy"])


In [13]:
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,
                           validation_data=(X_valid_B, y_valid_B))


Epoch 1/4
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.1004 - loss: -252.3466 - val_accuracy: 0.0994 - val_loss: -491.5898
Epoch 2/4
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.1001 - loss: -754.0504 - val_accuracy: 0.0994 - val_loss: -981.5413
Epoch 3/4
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.1001 - loss: -1256.1771 - val_accuracy: 0.0994 - val_loss: -1472.0323
Epoch 4/4
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.1001 - loss: -1758.5029 - val_accuracy: 0.0994 - val_loss: -1963.0740


In [14]:
# Unfreeze all layers
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

# Recompile with smaller learning rate
optimizer = keras.optimizers.SGD(learning_rate=0.001)
model_B_on_A.compile(loss="binary_crossentropy",
                     optimizer=optimizer,
                     metrics=["accuracy"])

# Continue training (fine-tuning)
history_fine = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,
                                validation_data=(X_valid_B, y_valid_B))


Epoch 1/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.0998 - loss: nan - val_accuracy: 0.1042 - val_loss: nan
Epoch 2/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.0996 - loss: nan - val_accuracy: 0.1042 - val_loss: nan
Epoch 3/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.0996 - loss: nan - val_accuracy: 0.1042 - val_loss: nan
Epoch 4/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.0996 - loss: nan - val_accuracy: 0.1042 - val_loss: nan
Epoch 5/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.0996 - loss: nan - val_accuracy: 0.1042 - val_loss: nan
Epoch 6/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.0996 - loss: nan - val_accuracy: 0.1042 - val_loss: nan
Epoch 7/16
[1m1719/1719[0m [32m━━━━━━━━━━━━━━━━━━

In [15]:
test_loss, test_acc = model_B_on_A.evaluate(X_test_B, y_test_B)
print(f"Final Test Accuracy: {test_acc:.4f}")


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1000 - loss: nan 
Final Test Accuracy: 0.1000


# regularzation (how to use regularization with layers)

In [16]:
layer = tf.keras.layers.Dense(100, activation="relu",kernel_initializer="he_normal",kernel_regularizer=tf.keras.regularizers.l2(0.01)) #0.01 lambda value

# Dropout
In practice, you can usually apply dropout only to the neurons in
 the top one to three layers (excluding the output layer).

In [20]:
from keras import Sequential
from keras.layers import Dense,Flatten,Dropout

In [23]:
model1=Sequential([
    Flatten(input_shape=[28,28]),
    Dropout(0.2), ## is applied for the previous layer (input)
    Dense(100,'relu'),
    Dropout(0.2),  ## is applied for the previous layer (input)
    Dense(100,'relu'),
    Dropout(0.2),
    Dense(10,'softmax')])