In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet
import opendatasets as od
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications import DenseNet169  # Updated to DenseNet169
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Conv2D, MaxPooling2D, Flatten, Activation
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)

# Set the path to the downloaded image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels lists
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):  # Ensure to only load .tif files
            # Read the image using OpenCV
            image = cv2.imread(os.path.join(root, file))
            if image is not None:  # Check if the image was loaded successfully
                # Resize the image to 128x128 and convert to array
                image = cv2.resize(image, (128, 128))
                image = img_to_array(image)  # Convert the image to array
                image_data.append(image)

                # Use the folder name as the label (e.g., 'agricultural', 'airplane', etc.)
                label = root.split(os.path.sep)[-1]
                labels.append(label)

# Convert the image data to a numpy array and normalize the pixel values to [0, 1]
image_data = np.array(image_data, dtype="float32") / 255.0
labels = np.array(labels)

# Binarize the labels (one-hot encode the class labels)
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets (80% train, 20% test)
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

# Print the number of training and testing samples
print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')

# # Knowledge distillation loss function (distills the knowledge from teacher to student)
# def distillation_loss(y_true, y_pred, teacher_preds, temperature=5):
#     """
#     Custom loss function for knowledge distillation.
#     Combines true label loss with teacher predictions (soft labels).
#     """
#     # If teacher_preds is a tuple, unpack it
#     if isinstance(teacher_preds, tuple):
#         teacher_preds = teacher_preds[0]

#     # True label loss
#     loss_true = K.categorical_crossentropy(y_true, y_pred)

#     # Teacher's soft labels (predictions) at a higher temperature
#     soft_teacher = K.softmax(teacher_preds / temperature)
#     soft_student = K.softmax(y_pred / temperature)

#     # Distillation loss (soft labels loss)
#     loss_distillation = K.categorical_crossentropy(soft_teacher, soft_student)

#     # Combine the two losses
#     return loss_true + loss_distillation * temperature ** 2

# Build the DenseNet169 Teacher Model (replace VGG16)
def build_teacher_model(input_shape, num_classes):
    base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=input_shape)

    # Add custom layers on top of the base model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Create the teacher model
    teacher_model = Model(inputs=base_model.input, outputs=predictions)
    return teacher_model

# Build a smaller CNN Student Model
def build_student_model(input_shape, num_classes):
    model = Sequential()

    # Simple CNN layers
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    return model

# Define input shape and number of classes
input_shape = (128, 128, 3)
num_classes = len(lb.classes_)

# Build and compile the teacher model (DenseNet169)
teacher_model = build_teacher_model(input_shape, num_classes)
teacher_model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the teacher model
teacher_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32)

# Evaluate the teacher model on the test set
teacher_test_loss, teacher_test_accuracy = teacher_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Teacher Model): {teacher_test_accuracy * 100:.2f}%")

# # Use the teacher model to predict on the training set (these will be the soft labels for the student)
# teacher_preds = teacher_model.predict(x_train)

# # Ensure teacher_preds is a numpy array (convert it if needed)
# teacher_preds = np.array(teacher_preds)

# # Build and compile the student model
# student_model = build_student_model(input_shape, num_classes)
# student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
#                       loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds),
#                       metrics=['accuracy'])

# # Set early stopping to avoid overfitting
# early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# # Train the student model using both the true labels and teacher's soft labels
# student_model.fit(x_train, y_train,
#                   validation_data=(x_test, y_test),
#                   epochs=5,
#                   batch_size=32,
#                   callbacks=[early_stopping])

# # Evaluate the student model on the test set
# test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
# print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")

# Custom distillation loss function
def distillation_loss(teacher_preds, temperature=5):
    """
    Custom loss function for knowledge distillation.
    Combines true label loss with teacher predictions (soft labels).
    """
    # If teacher_preds is a tuple, extract the predictions
    if isinstance(teacher_preds, tuple):
        teacher_preds = teacher_preds[0]

    def loss(y_true, y_pred):
        # True label loss
        loss_true = K.categorical_crossentropy(y_true, y_pred)

        # Teacher's soft labels (predictions) at a higher temperature
        soft_teacher = K.softmax(teacher_preds / temperature)
        soft_student = K.softmax(y_pred / temperature)

        # Distillation loss (soft labels loss)
        loss_distillation = K.categorical_crossentropy(soft_teacher, soft_student)

        # Combine the two losses
        return loss_true + loss_distillation * temperature ** 2

    return loss

# Use the teacher model to predict on the training set (these will be the soft labels for the student)
teacher_preds = teacher_model.predict(x_train)

# Ensure teacher_preds is a numpy array (convert it if needed)
teacher_preds = np.array(teacher_preds)

# Build and compile the student model
student_model = build_student_model(input_shape, num_classes)

# Compile the student model with distillation loss
student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                      loss=distillation_loss(teacher_preds),
                      metrics=['accuracy'])

# Set early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the student model using both the true labels and teacher's soft labels
student_model.fit(x_train, y_train,
                  validation_data=(x_test, y_test),
                  epochs=5,
                  batch_size=32,
                  callbacks=[early_stopping])

# Evaluate the student model on the test set
test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:Your Kaggle Key:Dataset URL: https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset
Downloading uc-merced-land-use-dataset.zip to ./uc-merced-land-use-dataset


100%|██████████| 317M/317M [00:04<00:00, 68.8MB/s]



Training samples: 1680, Testing samples: 420
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m51877672/51877672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m702s[0m 11s/step - accuracy: 0.3190 - loss: 2.6309 - val_accuracy: 0.6952 - val_loss: 1.1862
Epoch 2/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m598s[0m 11s/step - accuracy: 0.9274 - loss: 0.2901 - val_accuracy: 0.8881 - val_loss: 0.4117
Epoch 3/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m560s[0m 10s/step - accuracy: 0.9682 - loss: 0.1177 - val_accuracy: 0.8952 - val_loss: 0.2980
Epoch 4/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m544s[0m 10s/step - accuracy: 0.9871 - loss: 0.0631 - val_accuracy: 0.9310 - val_loss: 0.2110
Epoch 5/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m575s[0m 1

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5


AttributeError: 'tuple' object has no attribute 'rank'

In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet
import opendatasets as od
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications import DenseNet169  # Updated to DenseNet169
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Conv2D, MaxPooling2D, Flatten, Activation
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)

# Set the path to the downloaded image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels lists
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):  # Ensure to only load .tif files
            # Read the image using OpenCV
            image = cv2.imread(os.path.join(root, file))
            if image is not None:  # Check if the image was loaded successfully
                # Resize the image to 128x128 and convert to array
                image = cv2.resize(image, (128, 128))
                image = img_to_array(image)  # Convert the image to array
                image_data.append(image)

                # Use the folder name as the label (e.g., 'agricultural', 'airplane', etc.)
                label = root.split(os.path.sep)[-1]
                labels.append(label)

# Convert the image data to a numpy array and normalize the pixel values to [0, 1]
image_data = np.array(image_data, dtype="float32") / 255.0
labels = np.array(labels)

# Binarize the labels (one-hot encode the class labels)
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets (80% train, 20% test)
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

# Print the number of training and testing samples
print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')

# Build the DenseNet169 Teacher Model
def build_teacher_model(input_shape, num_classes):
    base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=input_shape)

    # Add custom layers on top of the base model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Create the teacher model
    teacher_model = Model(inputs=base_model.input, outputs=predictions)
    return teacher_model

# Build a smaller CNN Student Model
def build_student_model(input_shape, num_classes):
    model = Sequential()

    # Simple CNN layers
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    return model

# Define input shape and number of classes
input_shape = (128, 128, 3)
num_classes = len(lb.classes_)

# Build and compile the teacher model (DenseNet169)
teacher_model = build_teacher_model(input_shape, num_classes)
teacher_model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the teacher model
teacher_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32)

# Evaluate the teacher model on the test set
teacher_test_loss, teacher_test_accuracy = teacher_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Teacher Model): {teacher_test_accuracy * 100:.2f}%")

# # Custom distillation loss function
# def distillation_loss(teacher_preds, temperature=5):
#     """
#     Custom loss function for knowledge distillation.
#     Combines true label loss with teacher predictions (soft labels).
#     """
#     # Ensure teacher_preds is a numpy array (convert it if needed)
#     if isinstance(teacher_preds, tuple):
#         teacher_preds = np.array(teacher_preds[0])
#     else:
#         teacher_preds = np.array(teacher_preds)

#     def loss(y_true, y_pred):
#         # True label loss
#         loss_true = K.categorical_crossentropy(y_true, y_pred)

#         # Teacher's soft labels (predictions) at a higher temperature
#         soft_teacher = K.softmax(teacher_preds / temperature)
#         soft_student = K.softmax(y_pred / temperature)

#         # Distillation loss (soft labels loss)
#         loss_distillation = K.categorical_crossentropy(soft_teacher, soft_student)

#         # Combine the two losses
#         return loss_true + loss_distillation * temperature ** 2

#     return loss

# # Use the teacher model to predict on the training set (these will be the soft labels for the student)
# teacher_preds = teacher_model.predict(x_train)

# # Build and compile the student model
# student_model = build_student_model(input_shape, num_classes)

# # Compile the student model with distillation loss
# student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
#                       loss=distillation_loss(teacher_preds),
#                       metrics=['accuracy'])

# # Set early stopping to avoid overfitting
# early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# # Train the student model using both the true labels and teacher's soft labels
# student_model.fit(x_train, y_train,
#                   validation_data=(x_test, y_test),
#                   epochs=5,
#                   batch_size=32,
#                   callbacks=[early_stopping])

# # Evaluate the student model on the test set
# test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
# print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")


Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: akshayanand2002
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset
Downloading uc-merced-land-use-dataset.zip to ./uc-merced-land-use-dataset


100%|██████████| 317M/317M [00:02<00:00, 143MB/s]



Training samples: 1680, Testing samples: 420
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m51877672/51877672[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m623s[0m 9s/step - accuracy: 0.3101 - loss: 2.6424 - val_accuracy: 0.6405 - val_loss: 1.2486
Epoch 2/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m536s[0m 9s/step - accuracy: 0.9200 - loss: 0.3638 - val_accuracy: 0.8571 - val_loss: 0.5684
Epoch 3/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m514s[0m 9s/step - accuracy: 0.9767 - loss: 0.1276 - val_accuracy: 0.9238 - val_loss: 0.3011
Epoch 4/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m480s[0m 9s/step - accuracy: 0.9893 - loss: 0.0651 - val_accuracy: 0.9548 - val_loss: 0.1954
Epoch 5/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m486s[0m 9s/st

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5


AttributeError: 'tuple' object has no attribute 'rank'

In [None]:
teacher_model.summary()

In [None]:
# # Distillation Loss Function
# def distillation_loss(teacher_preds, temperature=5):
#     # Ensure teacher_preds is correctly shaped
#     teacher_preds = np.array(teacher_preds)
#     def loss(y_true, y_pred):
#         # True label loss
#         loss_true = K.categorical_crossentropy(y_true, y_pred)
#         # Soft labels loss
#         soft_teacher = K.softmax(teacher_preds / temperature)
#         soft_student = K.softmax(y_pred / temperature)
#         loss_distillation = K.categorical_crossentropy(soft_teacher, soft_student)
#         return loss_true + loss_distillation * temperature ** 2
#     return loss

# # Generate soft labels for the student model using the teacher's predictions
# teacher_preds = teacher_model.predict(x_train)

teacher_preds = np.array(teacher_model.predict(x_train))
import numpy as np
from tensorflow.keras import backend as K
from tensorflow.keras.losses import categorical_crossentropy

def distillation_loss(teacher_preds, temperature=5):
    teacher_preds = K.constant(teacher_preds)  # Ensure teacher_preds is a tensor

    def loss(y_true, y_pred):
        # True label loss
        loss_true = categorical_crossentropy(y_true, y_pred)
        # Soft labels loss
        soft_teacher = K.softmax(teacher_preds / temperature)
        soft_student = K.softmax(y_pred / temperature)
        loss_distillation = categorical_crossentropy(soft_teacher, soft_student)
        return loss_true + loss_distillation * temperature ** 2
    return loss


# Build and compile the student model
student_model = build_student_model(input_shape, num_classes)
student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                      loss=distillation_loss(teacher_preds),
                      metrics=['accuracy'])
student_model.summary()

# Set early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the student model
student_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32, callbacks=[early_stopping])
test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 2s/step


Epoch 1/5


InvalidArgumentError: Graph execution error:

Detected at node sequential_2_1/dropout_3_1/stateless_dropout/SelectV2 defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-4-6796827fd4dc>", line 48, in <cell line: 48>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 51, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/sequential.py", line 209, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 175, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 556, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/regularization/dropout.py", line 58, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/random.py", line 80, in dropout

condition [1680,128], then [32,128], and else [] must be broadcastable
	 [[{{node sequential_2_1/dropout_3_1/stateless_dropout/SelectV2}}]] [Op:__inference_one_step_on_iterator_136083]

In [None]:
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)


x_train shape: (1680, 128, 128, 3)
y_train shape: (1680, 21)
x_test shape: (420, 128, 128, 3)
y_test shape: (420, 21)


In [None]:
import numpy as np
from tensorflow.keras import backend as K
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping

# Ensure teacher_preds has the correct shape for distillation
teacher_preds = np.array(teacher_model.predict(x_train))
if teacher_preds.shape[0] != y_train.shape[0] or teacher_preds.shape[1] != y_train.shape[1]:
    teacher_preds = np.reshape(teacher_preds, (y_train.shape[0], y_train.shape[1]))

# Define Distillation Loss Function
def distillation_loss(teacher_preds, temperature=5):
    teacher_preds = K.constant(teacher_preds)  # Convert teacher_preds to a constant tensor

    def loss(y_true, y_pred):
        # Calculate true label loss
        loss_true = categorical_crossentropy(y_true, y_pred)

        # Calculate soft labels loss
        soft_teacher = K.softmax(teacher_preds / temperature)
        soft_student = K.softmax(y_pred / temperature)
        loss_distillation = categorical_crossentropy(soft_teacher, soft_student)

        return loss_true + loss_distillation * (temperature ** 2)

    return loss

# Build and compile the student model
student_model = build_student_model(input_shape=(128, 128, 3), num_classes=21)
student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                      loss=distillation_loss(teacher_preds),
                      metrics=['accuracy'])
student_model.summary()

# Set early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the student model
student_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32, callbacks=[early_stopping])

# Evaluate the student model
test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")


[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 2s/step


Epoch 1/5


InvalidArgumentError: Graph execution error:

Detected at node sequential_3_1/dropout_4_1/stateless_dropout/SelectV2 defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-6-ff3efa00231b>", line 40, in <cell line: 40>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 51, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/sequential.py", line 209, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 175, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 556, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/regularization/dropout.py", line 58, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/random.py", line 80, in dropout

condition [1680,128], then [32,128], and else [] must be broadcastable
	 [[{{node sequential_3_1/dropout_4_1/stateless_dropout/SelectV2}}]] [Op:__inference_one_step_on_iterator_137830]

In [None]:
import numpy as np
from tensorflow.keras import backend as K
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping

# Ensure teacher_preds has the correct shape for distillation
teacher_preds = np.array(teacher_model.predict(x_train))
if teacher_preds.shape[0] != y_train.shape[0] or teacher_preds.shape[1] != y_train.shape[1]:
    teacher_preds = np.reshape(teacher_preds, (y_train.shape[0], y_train.shape[1]))

# Define Distillation Loss Function
def distillation_loss(teacher_preds, temperature=5):
    teacher_preds = K.constant(teacher_preds)  # Convert teacher_preds to a constant tensor

    def loss(y_true, y_pred):
        # Calculate true label loss
        loss_true = categorical_crossentropy(y_true, y_pred)

        # Calculate soft labels loss
        soft_teacher = K.softmax(teacher_preds / temperature)
        soft_student = K.softmax(y_pred / temperature)
        loss_distillation = categorical_crossentropy(soft_teacher, soft_student)

        return loss_true + loss_distillation * (temperature ** 2)

    return loss

# Build and compile the student model
student_model = build_student_model(input_shape=(128, 128, 3), num_classes=21)
student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                      loss=distillation_loss(teacher_preds),
                      metrics=['accuracy'])
student_model.summary()

# Set early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the student model
student_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32, callbacks=[early_stopping])

# Evaluate the student model
test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")


[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 2s/step


Epoch 1/5


InvalidArgumentError: Graph execution error:

Detected at node sequential_4_1/dropout_5_1/stateless_dropout/SelectV2 defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-7-ff3efa00231b>", line 40, in <cell line: 40>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 51, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/sequential.py", line 209, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 175, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 556, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/regularization/dropout.py", line 58, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/random.py", line 80, in dropout

condition [1680,128], then [32,128], and else [] must be broadcastable
	 [[{{node sequential_4_1/dropout_5_1/stateless_dropout/SelectV2}}]] [Op:__inference_one_step_on_iterator_139577]

In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
import opendatasets as od
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Conv2D, MaxPooling2D, Flatten, Activation
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras.backend as K

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)

# Set the path to the downloaded image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels lists
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):  # Ensure to only load .tif files
            # Read the image using OpenCV
            image = cv2.imread(os.path.join(root, file))
            if image is not None:  # Check if the image was loaded successfully
                # Resize the image to 128x128 and convert to array
                image = cv2.resize(image, (128, 128))
                image = img_to_array(image)  # Convert the image to array
                image_data.append(image)

                # Use the folder name as the label (e.g., 'agricultural', 'airplane', etc.)
                label = root.split(os.path.sep)[-1]
                labels.append(label)

# Convert the image data to a numpy array and normalize the pixel values to [0, 1]
image_data = np.array(image_data, dtype="float32") / 255.0
labels = np.array(labels)

# Binarize the labels (one-hot encode the class labels)
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets (80% train, 20% test)
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

# Print the number of training and testing samples
print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')

# Build the DenseNet169 Teacher Model
def build_teacher_model(input_shape, num_classes):
    base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=input_shape)

    # Add custom layers on top of the base model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  # Ensures output shape matches (batch_size, num_classes)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Create the teacher model
    teacher_model = Model(inputs=base_model.input, outputs=predictions)
    return teacher_model

# Build a smaller CNN Student Model
def build_student_model(input_shape, num_classes):
    model = Sequential()

    # Simple CNN layers
    model.add(Conv2D(32, (3, 3), padding="same", input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    return model

# Define input shape and number of classes
input_shape = (128, 128, 3)
num_classes = len(lb.classes_)

# Build and compile the teacher model (DenseNet169)
teacher_model = build_teacher_model(input_shape, num_classes)
teacher_model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the teacher model
teacher_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=32)

# Evaluate the teacher model on the test set
teacher_test_loss, teacher_test_accuracy = teacher_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Teacher Model): {teacher_test_accuracy * 100:.2f}%")

# Distillation Loss Function
def distillation_loss(teacher_preds, temperature=5):
    """
    Custom loss function for knowledge distillation.
    Combines true label loss with teacher predictions (soft labels).
    """
    # Define teacher predictions as a constant tensor for the loss function
    teacher_preds_tensor = K.constant(teacher_preds)

    def loss(y_true, y_pred):
        # True label loss
        loss_true = K.categorical_crossentropy(y_true, y_pred)

        # Teacher's soft labels (predictions) at a higher temperature
        soft_teacher = K.softmax(teacher_preds_tensor / temperature)
        soft_student = K.softmax(y_pred / temperature)

        # Distillation loss (soft labels loss)
        loss_distillation = K.categorical_crossentropy(soft_teacher, soft_student)

        # Combine the two losses
        return loss_true + loss_distillation * (temperature ** 2)

    return loss

# Use the teacher model to predict on the training set (these will be the soft labels for the student)
teacher_preds = teacher_model.predict(x_train)

# Build and compile the student model
student_model = build_student_model(input_shape, num_classes)
student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                      loss=distillation_loss(teacher_preds),
                      metrics=['accuracy'])

# Set early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the student model using both the true labels and teacher's soft labels
student_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=15, batch_size=32, callbacks=[early_stopping])

# Evaluate the student model on the test set
test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")


Skipping, found downloaded files in "./uc-merced-land-use-dataset" (use force=True to force download)
Training samples: 1680, Testing samples: 420
Epoch 1/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m617s[0m 9s/step - accuracy: 0.3085 - loss: 2.6271 - val_accuracy: 0.7214 - val_loss: 0.9943
Epoch 2/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m490s[0m 9s/step - accuracy: 0.9164 - loss: 0.3577 - val_accuracy: 0.8976 - val_loss: 0.4448
Epoch 3/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m522s[0m 9s/step - accuracy: 0.9849 - loss: 0.1087 - val_accuracy: 0.9381 - val_loss: 0.2579
Epoch 4/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m504s[0m 9s/step - accuracy: 0.9898 - loss: 0.0663 - val_accuracy: 0.9548 - val_loss: 0.1809
Epoch 5/5
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m501s[0m 9s/step - accuracy: 0.9957 - loss: 0.0424 - val_accuracy: 0.9619 - val_loss: 0.1369
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15


InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/compile_loss/loss/mul_1/BroadcastGradientArgs defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-8-9e273515d5d3>", line 147, in <cell line: 147>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 70, in train_step

Incompatible shapes: [1680,21] vs. [32,21]
	 [[{{node gradient_tape/compile_loss/loss/mul_1/BroadcastGradientArgs}}]] [Op:__inference_one_step_on_iterator_274382]

In [None]:
# Generate teacher predictions (soft labels) for the student model
teacher_preds = teacher_model.predict(x_train)

# Define a generator to feed both true labels and teacher's soft labels to the student model
def data_generator(x, y_true, teacher_preds, batch_size=32):
    batch_count = len(x) // batch_size
    while True:
        for i in range(batch_count):
            x_batch = x[i * batch_size:(i + 1) * batch_size]
            y_batch = y_true[i * batch_size:(i + 1) * batch_size]
            teacher_pred_batch = teacher_preds[i * batch_size:(i + 1) * batch_size]
            yield x_batch, {'student_output': y_batch, 'teacher_output': teacher_pred_batch}

# Distillation Loss Function
def distillation_loss(y_true, y_pred, teacher_pred, temperature=5):
    """
    Custom loss function for knowledge distillation.
    Combines true label loss with teacher predictions (soft labels).
    """
    # True label loss
    loss_true = K.categorical_crossentropy(y_true, y_pred)

    # Teacher's soft labels (predictions) at a higher temperature
    soft_teacher = K.softmax(teacher_pred / temperature)
    soft_student = K.softmax(y_pred / temperature)

    # Distillation loss (soft labels loss)
    loss_distillation = K.categorical_crossentropy(soft_teacher, soft_student)

    # Combine the two losses
    return loss_true + loss_distillation * (temperature ** 2)

# Build and compile the student model
student_model = build_student_model(input_shape, num_classes)
student_model.compile(optimizer=SGD(learning_rate=1e-3, momentum=0.9),
                      loss=distillation_loss,
                      metrics=['accuracy'])

# Set early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the student model using the generator
student_model.fit(data_generator(x_train, y_train, teacher_preds, batch_size=32),
                  validation_data=(x_test, y_test),
                  steps_per_epoch=len(x_train) // 32,
                  epochs=15,
                  callbacks=[early_stopping])

# Evaluate the student model on the test set
test_loss, test_accuracy = student_model.evaluate(x_test, y_test)
print(f"Test Accuracy (Student Model): {test_accuracy * 100:.2f}%")

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step
Epoch 1/15


TypeError: distillation_loss() missing 1 required positional argument: 'teacher_pred'

In [None]:
############################################################

In [None]:
# Install the required libraries
!pip install opendatasets --upgrade --quiet
!pip install kaggle --quiet


In [None]:
# Import Kaggle and OpenDatasets to download datasets from Kaggle
import opendatasets as od

# Download the dataset from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/abdulhasibuddin/uc-merced-land-use-dataset'
od.download(dataset_url)


Skipping, found downloaded files in "./uc-merced-land-use-dataset" (use force=True to force download)


In [None]:
import os
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

# Set the path to the image folder
data_path = 'uc-merced-land-use-dataset/UCMerced_LandUse/Images'

# Initialize image data and labels
image_data = []
labels = []

# Load the image dataset
for root, dirs, files in os.walk(data_path):
    for file in files:
        if file.endswith(".tif"):
            # Read the image
            image = cv2.imread(os.path.join(root, file))
            image = cv2.resize(image, (128, 128))
            image = img_to_array(image)
            image_data.append(image)

            # Use the folder name as the label
            label = root.split(os.path.sep)[-1]
            labels.append(label)

# Convert to numpy arrays and normalize the images
image_data = np.array(image_data, dtype="float") / 255.0
labels = np.array(labels)

# Binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

print(f'Training samples: {len(x_train)}, Testing samples: {len(x_test)}')


Training samples: 1680, Testing samples: 420


In [None]:
# from keras.models import Model
# from keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D, AveragePooling2D, MaxPooling2D, Conv2D
# import keras.backend as K
# from keras.layers import Input, Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout, BatchNormalization, Concatenate

# def DenseNet(nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.0, dropout_rate=0.0, weight_decay=1e-4,  num_classes=None, weights_path=None):
#     eps = 1.1e-5
#     compression = 1.0 - reduction
#     global concat_axis
#     if tf.keras.backend.image_data_format() == 'tf':
#         concat_axis = 3
#         img_input = Input(shape=(224, 224, 3), name='data')
#     else:
#         concat_axis = 1
#         img_input = Input(shape=(3, 224, 224), name='data')

#     # DenseNet-169 architecture
#     nb_layers = [6, 12, 32, 32]  # For DenseNet-169
#     x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
#     x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(x)
#     x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x)
#     x = Activation('relu', name='relu1')(x)
#     x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x)
#     x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)

#     for block_idx in range(nb_dense_block - 1):
#         stage = block_idx + 2
#         x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
#         x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate)
#         nb_filter = int(nb_filter * compression)

#     final_stage = stage + 1
#     x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

#     x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv' + str(final_stage) + '_blk_bn')(x)
#     x = Activation('relu', name='relu' + str(final_stage) + '_blk')(x)
#     x = GlobalAveragePooling2D(name='pool' + str(final_stage))(x)

#     x = Dense(classes, name='fc6')(x)
#     x = Activation('softmax', name='prob')(x)

#     model = Model(img_input, x, name='densenet')

#     if weights_path is not None:
#         model.load_weights(weights_path)

#     return model

# def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
#     inter_channel = nb_filter * 4
#     x = BatchNormalization(axis=concat_axis)(x)
#     x = Activation('relu')(x)
#     x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

#     if dropout_rate:
#         x = Dropout(dropout_rate)(x)

#     x = BatchNormalization(axis=concat_axis)(x)
#     x = Activation('relu')(x)
#     x = ZeroPadding2D((1, 1))(x)
#     x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

#     if dropout_rate:
#         x = Dropout(dropout_rate)(x)

#     return x

# def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None):
#     x = BatchNormalization(axis=concat_axis)(x)
#     x = Activation('relu')(x)
#     x = Conv2D(int(nb_filter * compression), (1, 1), use_bias=False)(x)

#     if dropout_rate:
#         x = Dropout(dropout_rate)(x)

#     x = AveragePooling2D((2, 2), strides=(2, 2))(x)
#     return x

# def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
#     concat_feat = x

#     for i in range(nb_layers):
#         branch = i + 1
#         x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
#         concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

#         if grow_nb_filters:
#             nb_filter += growth_rate

#     return concat_feat, nb_filter


In [None]:
# def student_model(img_rows, img_cols, color_type=1, num_classes=None):
#     nb_dense_block = 3  # Fewer dense blocks than the teacher model
#     growth_rate = 16  # Smaller growth rate
#     nb_filter = 32  # Fewer filters
#     return DenseNet(nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, num_classes=num_classes)


In [None]:
# import tensorflow as tf

# def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.1):
#     """
#     Compute the distillation loss combining both:
#     - Soft target loss (KL divergence)
#     - Hard target loss (standard cross-entropy)
#     """
#     # Get the current batch size
#     batch_size = tf.shape(y_pred)[0]

#     # Use tf.gather to select the relevant teacher predictions for the current batch
#     teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))

#     # Softmax for soft labels and student predictions with temperature scaling
#     y_true = tf.keras.activations.softmax(y_true / temperature)
#     y_pred = tf.keras.activations.softmax(y_pred / temperature)
#     teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)

#     # Cross-entropy between the student predictions and the teacher predictions (soft targets)
#     soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)

#     # Standard cross-entropy loss with hard targets
#     hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)

#     # Weighted sum of the distillation loss and the hard target loss
#     return alpha * soft_loss + (1. - alpha) * hard_loss


In [None]:
# # Build the teacher and student models
# teacher = DenseNet(nb_dense_block=4, growth_rate=32, nb_filter=64, num_classes=21)
# student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

# # Compile the teacher model
# teacher.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

# # Precompute teacher's predictions for the training set
# teacher_preds = teacher.predict(x_train)

# # Compile the student model with the distillation loss (using precomputed teacher predictions)
# student.compile(optimizer='adam', loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds), metrics=['accuracy'])


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, GlobalAveragePooling2D, BatchNormalization, ZeroPadding2D
from tensorflow.keras.layers import AveragePooling2D, MaxPooling2D, Conv2D, Concatenate
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Global variable for axis
concat_axis = -1 if tf.keras.backend.image_data_format() == 'channels_last' else 1

def conv_block(x, stage, branch, nb_filter, dropout_rate=None):
    inter_channel = nb_filter * 4
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(inter_channel, (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D((1, 1))(x)
    x = Conv2D(nb_filter, (3, 3), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    return x

def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None):
    x = BatchNormalization(axis=concat_axis)(x)
    x = Activation('relu')(x)
    x = Conv2D(int(nb_filter * compression), (1, 1), use_bias=False)(x)

    if dropout_rate:
        x = Dropout(dropout_rate)(x)

    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
    return x

def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, grow_nb_filters=True):
    concat_feat = x

    for i in range(nb_layers):
        branch = i + 1
        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate)
        concat_feat = Concatenate(axis=concat_axis)([concat_feat, x])

        if grow_nb_filters:
            nb_filter += growth_rate

    return concat_feat, nb_filter

def DenseNet(nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.0, dropout_rate=0.0, weight_decay=1e-4, num_classes=None, weights_path=None):
    eps = 1.1e-5
    compression = 1.0 - reduction
    global concat_axis

    if tf.keras.backend.image_data_format() == 'channels_last':
        concat_axis = 3
        img_input = Input(shape=(224, 224, 3), name='data')
    else:
        concat_axis = 1
        img_input = Input(shape=(3, 224, 224), name='data')

    # DenseNet-169 architecture
    nb_layers = [6, 12, 32, 32]  # For DenseNet-169
    x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
    x = Conv2D(nb_filter, (7, 7), strides=(2, 2), name='conv1', use_bias=False)(x)
    x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x)
    x = Activation('relu', name='relu1')(x)
    x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)

    for block_idx in range(nb_dense_block - 1):
        stage = block_idx + 2
        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
        x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate)
        nb_filter = int(nb_filter * compression)

    final_stage = stage + 1
    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)

    x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv' + str(final_stage) + '_blk_bn')(x)
    x = Activation('relu', name='relu' + str(final_stage) + '_blk')(x)
    x = GlobalAveragePooling2D(name='pool' + str(final_stage))(x)

    x = Dense(num_classes, name='fc6')(x)
    x = Activation('softmax', name='prob')(x)

    model = Model(img_input, x, name='densenet')

    if weights_path is not None:
        model.load_weights(weights_path)

    return model

def student_model(img_rows, img_cols, color_type=1, num_classes=None):
    nb_dense_block = 3  # Fewer dense blocks than the teacher model
    growth_rate = 16  # Smaller growth rate
    nb_filter = 32  # Fewer filters
    return DenseNet(nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, num_classes=num_classes)

def distillation_loss(y_true, y_pred, teacher_preds, temperature=3.0, alpha=0.1):
    batch_size = tf.shape(y_pred)[0]
    teacher_batch_preds = tf.gather(teacher_preds, tf.range(batch_size))
    y_true = tf.keras.activations.softmax(y_true / temperature)
    y_pred = tf.keras.activations.softmax(y_pred / temperature)
    teacher_batch_preds = tf.keras.activations.softmax(teacher_batch_preds / temperature)
    soft_loss = tf.keras.losses.categorical_crossentropy(teacher_batch_preds, y_pred)
    hard_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
    return alpha * soft_loss + (1. - alpha) * hard_loss

# Example usage
# Assuming you have defined x_train and y_train with your training data

# Create an ImageDataGenerator for training
datagen = ImageDataGenerator(rescale=1./255)

# Define a generator that will resize images and feed them into the model
train_generator = datagen.flow(x_train, y_train, batch_size=32)

# Build the teacher and student models
teacher = DenseNet(nb_dense_block=4, growth_rate=32, nb_filter=64, num_classes=21)
student = student_model(img_rows=128, img_cols=128, color_type=3, num_classes=21)

# Compile the teacher model
teacher.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the teacher model using the generator
teacher.fit(train_generator, steps_per_epoch=len(x_train) // 32, epochs=10)

# Precompute teacher's predictions for the training set
teacher_preds = teacher.predict(train_generator, steps=len(x_train) // 32)

# Compile the student model with the distillation loss (using precomputed teacher predictions)
student.compile(optimizer='adam', loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_preds), metrics=['accuracy'])

# Train the student model using the generator
student.fit(train_generator, steps_per_epoch=len(x_train) // 32, epochs=10)


Epoch 1/10


ValueError: Input 0 of layer "densenet" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(None, 128, 128, 3)