In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks, Input
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import keras_tuner as kt

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Enhanced data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(x_train)

# Define the student model
def create_student_model(conv1_filters, conv2_filters, conv3_filters, conv4_filters, conv5_filters, conv_filters, dense_units, dropout_rate):
    inputs = Input(shape=(32, 32, 3))
    x = layers.Conv2D(conv1_filters, (3, 3), activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv2_filters, (3, 3), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv3_filters, (3, 3), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(conv4_filters, (3, 3), activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv5_filters, (3, 3), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv6_filters, (3, 3), activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(dense_units, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(10, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

def distillation_loss(y_true, y_pred, teacher_logits, temperature=3, alpha=0.1):
    y_true = tf.cast(y_true, tf.float32)
    teacher_logits = tf.cast(teacher_logits, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    # Compute soft targets
    soft_targets = tf.nn.softmax(teacher_logits / temperature)
    soft_prob = tf.nn.softmax(y_pred / temperature)
    
    # Soft targets loss
    soft_targets_loss = tf.reduce_mean(
        tf.keras.losses.categorical_crossentropy(soft_targets, soft_prob)
    )
    
    # Hard targets loss
    student_loss = tf.reduce_mean(
        tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
    )
    
    # Combined loss
    return alpha * student_loss + (1 - alpha) * soft_targets_loss

# Define the hyperparameter tuning function
def build_model(hp):
    conv1_filters = hp.Int('conv1_filters', 64, 64)
    conv2_filters = hp.Int('conv2_filters', 192, 192)
    conv3_filters = hp.Int('conv3_filters', 384, 384)
    dense_units = hp.Int('dense_units', 768, 768)
    dropout_rate = hp.Float('dropout_rate', 0.3, 0.6, step=0.1)
    student_model = create_student_model(conv1_filters, conv2_filters, conv3_filters, dense_units, dropout_rate)
    optimizer = optimizers.Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='log'))
    student_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return student_model

# Optimization for hyperparameter tuning
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    num_initial_points=3,
    max_trials=6,  # Set to 1 trial
    directory='kt_dir',
    project_name='student_model_tuning',
    overwrite=True
)

# Perform the hyperparameter search
tuner.search(
    datagen.flow(x_train, y_train, batch_size=64),
    epochs=10,  # 20 epochs for each trial
    validation_data=(x_test, y_test)
)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the best model
best_student_model = create_student_model(
    best_hps.get('conv1_filters'),
    best_hps.get('conv2_filters'),
    best_hps.get('conv3_filters'),
    best_hps.get('dense_units'),
    best_hps.get('dropout_rate')
)
best_student_model.compile(
    optimizer=optimizers.Adam(best_hps.get('learning_rate')),
    loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_model(best_student_model.input)),
    metrics=['accuracy']
)

# Callbacks for student model training
checkpoint_callback_student = callbacks.ModelCheckpoint(
    'student_model.weights.h5',
    save_weights_only=True,
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)
early_stopping_student = callbacks.EarlyStopping(monitor='val_loss', patience=5)
lr_scheduler_student = callbacks.ReduceLROnPlateau(factor=0.5, patience=2)

# Train the student model with knowledge distillation
best_student_model.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    epochs=50,  # Train the final model for more epochs
    validation_data=(x_test, y_test),
    callbacks=[checkpoint_callback_student, early_stopping_student, lr_scheduler_student]
)

# Evaluate the student model
eval_results = best_student_model.evaluate(x_test, y_test)
print(f'Test loss: {eval_results[0]}')
print(f'Test accuracy: {eval_results[1]}')


In [7]:
import tensorflow as tf
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.initializers import HeNormal
import keras_tuner as kt

# Load and preprocess data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Enhanced data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(x_train)

def create_student_model(conv1_filters, conv2_filters, conv3_filters, conv4_filters, conv5_filters, conv6_filters, dense_units, dropout_rate):
    he_init = HeNormal()
    inputs = Input(shape=(32, 32, 3))
    x = layers.Conv2D(conv1_filters, (3, 3), activation='relu', kernel_initializer=he_init)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv2_filters, (3, 3), activation='relu', kernel_initializer=he_init)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv3_filters, (3, 3), activation='relu', kernel_initializer=he_init)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(conv4_filters, (3, 3), activation='relu', kernel_initializer=he_init)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv5_filters, (3, 3), activation='relu', kernel_initializer=he_init)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(conv6_filters, (3, 3), activation='relu', kernel_initializer=he_init)(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(dense_units, activation='relu', kernel_initializer=he_init)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(10, activation='softmax', kernel_initializer=he_init)(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

def distillation_loss(y_true, y_pred, teacher_logits, temperature=3, alpha=0.1):
    y_true = tf.cast(y_true, tf.float32)
    teacher_logits = tf.cast(teacher_logits, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    # Compute soft targets
    soft_targets = tf.nn.softmax(teacher_logits / temperature)
    soft_prob = tf.nn.softmax(y_pred / temperature)
    
    # Soft targets loss
    soft_targets_loss = tf.reduce_mean(
        tf.keras.losses.categorical_crossentropy(soft_targets, soft_prob)
    )
    
    # Hard targets loss
    student_loss = tf.reduce_mean(
        tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
    )
    
    # Combined loss
    return alpha * student_loss + (1 - alpha) * soft_targets_loss

# Define the hyperparameter tuning function
def build_model(hp):
    conv1_filters = hp.Int('conv1_filters', 64, 64)
    conv2_filters = hp.Int('conv2_filters', 192, 192)
    conv3_filters = hp.Int('conv3_filters', 384, 384)
    dense_units = hp.Int('dense_units', 768, 768)
    dropout_rate = hp.Float('dropout_rate', 0.3, 0.6, step=0.1)
    student_model = create_student_model(conv1_filters, conv2_filters, conv3_filters, dense_units, dropout_rate)
    optimizer = optimizers.Adam(hp.Float('learning_rate', 1e-4, 1e-2, sampling='log'))
    student_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return student_model

# Optimization for hyperparameter tuning
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    num_initial_points=3,
    max_trials=6,  # Set to 1 trial
    directory='kt_dir',
    project_name='student_model_tuning',
    overwrite=True
)

# Perform the hyperparameter search
tuner.search(
    datagen.flow(x_train, y_train, batch_size=64),
    epochs=10,  # 20 epochs for each trial
    validation_data=(x_test, y_test)
)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the best model
best_student_model = create_student_model(
    best_hps.get('conv1_filters'),
    best_hps.get('conv2_filters'),
    best_hps.get('conv3_filters'),
    best_hps.get('dense_units'),
    best_hps.get('dropout_rate')
)
best_student_model.compile(
    optimizer=optimizers.Adam(best_hps.get('learning_rate')),
    loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_model(best_student_model.input)),
    metrics=['accuracy']
)

# Callbacks for student model training
checkpoint_callback_student = callbacks.ModelCheckpoint(
    'student_model.weights.h5',
    save_weights_only=True,
    save_best_only=True,
    monitor='val_loss',
    mode='min',
    verbose=1
)
early_stopping_student = callbacks.EarlyStopping(monitor='val_loss', patience=5)
lr_scheduler_student = callbacks.ReduceLROnPlateau(factor=0.5, patience=2)

# Train the student model with knowledge distillation
best_student_model.fit(
    datagen.flow(x_train, y_train, batch_size=128),
    epochs=50,  # Train the final model for more epochs
    validation_data=(x_test, y_test),
    callbacks=[checkpoint_callback_student, early_stopping_student, lr_scheduler_student]
)

# Evaluate the student model
eval_results = best_student_model.evaluate(x_test, y_test)
print(f'Test loss: {eval_results[0]}')
print(f'Test accuracy: {eval_results[1]}')


AttributeError: partially initialized module 'pandas' has no attribute '_pandas_parser_CAPI' (most likely due to a circular import)