# Experiment 2: 
Last residual block removed, unfrozen layers

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar100
from tensorflow.keras.models import Model, clone_model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.layers import Input, Concatenate
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import numpy as np

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


# Dataset Acquisition

In [None]:
(x_train, y_train), (x_test, y_test) = cifar100.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
[1m127516672/169001437[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m3s[0m 0us/step

KeyboardInterrupt: 

In [None]:
import tensorflow as tf
from tensorflow.keras import datasets

# Load CIFAR-100 dataset
(x_train_, y_train_), (x_test_, y_test_) = datasets.cifar100.load_data()

# Define the desired image size
target_size = (224, 224)

# Function to preprocess a single image
def preprocess_image(image, label):
    image = tf.image.resize(image, target_size)  # Resize image
    image = image / 255.0  # Normalize to [0, 1]
    return image, label

# Function to process data in batches
def preprocess_in_batches(x_data, y_data, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data))
    dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# Create preprocessed datasets
batch_size = 64
train_ds = preprocess_in_batches(x_train_, y_train_, batch_size)
test_ds = preprocess_in_batches(x_test_, y_test_, batch_size)

# Check the shape of a single batch
for images, labels in train_ds.take(1):
    print("Image batch shape:", images.shape)
    print("Label batch shape:", labels.shape)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz
[1m169001437/169001437[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 0us/step
Image batch shape: (64, 224, 224, 3)
Label batch shape: (64, 1)


In [None]:
def generate_blocks(image, block_size=4, num_target_patches=4):
    """
    Generate masked target patches and a context image with patches removed.

    Args:
        image (np.ndarray): Input image as a 2D NumPy array.
        block_size (int): Size of each block.
        num_target_patches (int): Number of target patches to generate.

    Returns:
        tuple: A tuple containing:
            - context (np.ndarray): Image with target patches removed.
            - target_patches (list[np.ndarray]): List of target patches.
    """
    # Ensure image dimensions are divisible by block_size
    image_height, image_width = image.shape[0], image.shape[1]
    assert image_height % block_size == 0, "Image height must be divisible by block_size"
    assert image_width % block_size == 0, "Image width must be divisible by block_size"
    num_rows = image_height // block_size
    num_cols = image_width // block_size

    # Calculate patch indices directly as NumPy arrays
    patch_row_indices = np.arange(0, image_height, block_size)
    patch_col_indices = np.arange(0, image_width, block_size)

    # Initialize context image
    context = image.copy()

    target_patches = []
    target_patch_coords = []
    for _ in range(num_target_patches):
        # Randomly select top-left coordinates for the patch
        top_left_y = np.random.choice(patch_row_indices)
        top_left_x = np.random.choice(patch_col_indices)

        # Randomly determine patch size within bounds
        bottom_right_y = min(
            top_left_y + np.random.choice(np.arange(2 * block_size, 8 * block_size, block_size)),
            image_height
        )
        bottom_right_x = min(
            top_left_x + np.random.choice(np.arange(2 * block_size, 8 * block_size, block_size)),
            image_width
        )

        # Extract the patch directly without copying
        target_patch = np.zeros_like(image)
        target_patch[top_left_y:bottom_right_y, top_left_x:bottom_right_x] = \
            image[top_left_y:bottom_right_y, top_left_x:bottom_right_x]

        # Add the target patch to the list
        target_patches.append(target_patch)

        # Add target patch coords to the list
        target_patch_coords.append((top_left_y, top_left_x, bottom_right_y, bottom_right_x))

        # Remove the patch from the context image
        context[top_left_y:bottom_right_y, top_left_x:bottom_right_x] = 0

    return context, target_patches, target_patch_coords


# visualizing x_train and train_ds


In [None]:
import cv2
import numpy as np
from tensorflow.keras import datasets

# Load CIFAR-100 dataset
(x_train_, y_train_), (x_test_, y_test_) = datasets.cifar100.load_data()

# Define target size
target_size = (224, 224)

# Function to resize images using OpenCV
def resize_images_with_opencv(images, target_size):
    resized_images = []
    for img in images:
        resized_img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)  # Resize with INTER_AREA for shrinking
        resized_images.append(resized_img)
    return np.array(resized_images)

# Resize train and test images
x_train_resized = resize_images_with_opencv(x_train_, target_size)
x_test_resized = resize_images_with_opencv(x_test_, target_size)

# Normalize resized images to [0, 1]
x_train_resized = x_train_resized / 255.0
x_test_resized = x_test_resized / 255.0

# Check the shapes
print("Resized x_train shape:", x_train_resized.shape)  # (50000, 224, 224, 3)
print("Resized x_test shape:", x_test_resized.shape)    # (10000, 224, 224, 3)


In [None]:
# Example context and target blocks

# choose a random int between 0 and 50000
random_int = np.random.randint(0, 50000)

# Generate blocks
context, target_patches, target_patch_coords = generate_blocks(x_train[random_int])

# Create a figure to show 6 images in a row
fig, axs = plt.subplots(1, 6, figsize=(15, 5))

# Show original in the first column
axs[0].imshow(x_train[random_int])
axs[0].set_title('Original')

# Show context in the second column
axs[1].imshow(context)
axs[1].set_title('Context')

# Show the targets in the rest of the columns
for i in range(2, 6):
    axs[i].imshow(target_patches[i-2])
    axs[i].set_title(f'Target {i-1}')

# Print target coords
print("Target coords:")
for coord in target_patch_coords:
    print(coord)

# Show the plot
plt.show()

TypeError: '_PrefetchDataset' object is not subscriptable

## Encoder Decoder

In [None]:
def init_encoder():
    # Load the ResNet50 model without the top classification layers
    encoder = ResNet50(include_top=False, input_shape=(32, 32, 3))
    x = encoder.layers[-32].output
    encoder = Model(inputs=encoder.input, outputs=x)

    # Freeze the encoder layers
    for layer in encoder.layers:
        layer.trainable = False

    # Set all layers to trainable
    for layer in encoder.layers:
        layer.trainable = True

    # Add a global average pooling layer to collapse spatial dimensions
    x = GlobalAveragePooling2D()(encoder.output)

    # Create the new model
    model = Model(inputs=encoder.input, outputs=x)

    return model

# Initialize the encoders
context_encoder = init_encoder()
target_encoder = clone_model(context_encoder)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [None]:
context_encoder.summary()

In [None]:
target_encoder.summary()

# Predictor Model

In [None]:
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model

# Input layers
context_input = Input(shape=(512,), name='context_input')  # Context encoding for ResNet50
coords_input = Input(shape=(4,), name='coords_input')       # Target patch coordinates

# Concatenate the inputs
x = Concatenate(name='concat_features')([context_input, coords_input])

# Dense layers to process the combined input
x = Dense(1024, activation='relu', name='dense1')(x)
x = Dense(512, activation='relu', name='dense2')(x)

# Output layer
output = Dense(512, activation='relu', name='output_vector')(x)  # Final output vector

# Create the model
predictor_model = Model(inputs=[context_input, coords_input], outputs=output, name='predictor_model')

# Model summary
predictor_model.summary()


In [None]:
# Training Parameters
epochs = 10
batch_size = 32
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
loss_fn = tf.keras.losses.MeanSquaredError()

# EMA Momentum Parameters
initial_momentum = 0.996
final_momentum = 1.0

In [None]:
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model

# Freeze the target encoder
for layer in target_encoder.layers:
    layer.trainable = False

# Build linear probing model
def build_linear_probe_model(encoder, num_classes):
    # Input for image
    input_layer = Input(shape=(32, 32, 3), name="image_input")

    # Pass input through the frozen encoder
    x = encoder(input_layer)

    # Add linear classification head
    output_layer = Dense(num_classes, activation="softmax", name="classification_head")(x)

    # Build model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# Create the linear probing model
num_classes = 100  # CIFAR-100 has 100 classes
linear_probe_model = build_linear_probe_model(target_encoder, num_classes)


In [None]:
# Compile the model
linear_probe_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)


In [None]:
# Load CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()

# Normalize the images
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Train the linear probing model
history = linear_probe_model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    batch_size=64,
    epochs=20
)


Epoch 1/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 18ms/step - accuracy: 0.0153 - loss: 4.5805 - val_accuracy: 0.0303 - val_loss: 4.5158
Epoch 2/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.0423 - loss: 4.4991 - val_accuracy: 0.0448 - val_loss: 4.4564
Epoch 3/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.0521 - loss: 4.4402 - val_accuracy: 0.0568 - val_loss: 4.4099
Epoch 4/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.0601 - loss: 4.3947 - val_accuracy: 0.0599 - val_loss: 4.3743
Epoch 5/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.0668 - loss: 4.3578 - val_accuracy: 0.0688 - val_loss: 4.3422
Epoch 6/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.0748 - loss: 4.3258 - val_accuracy: 0.0684 - val_loss: 4.3137
Epoch 7/20
[1m782/782

In [None]:
# Evaluate on the test set
test_loss, test_accuracy = linear_probe_model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.1090 - loss: 4.0890
Test Accuracy: 10.65%


# Test