<a href="https://colab.research.google.com/github/Prarthana-10/GPT-Mastery-SOC/blob/main/22B0327_Week3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1st ques

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.losses import CategoricalCrossentropy

# Assume we have 10 classes (e.g., MNIST dataset)
num_classes = 10

# Create uniform predictions
uniform_predictions = np.full((num_classes,), 1 / num_classes)

# Create dummy labels (true labels for a batch of 100 samples, for example)
true_labels = np.random.randint(0, num_classes, size=(100,))

# Convert true labels to one-hot encoded format
one_hot_labels = tf.one_hot(true_labels, depth=num_classes)

# Define the loss function
loss_fn = CategoricalCrossentropy()

# Calculate the loss for uniform predictions
uniform_loss = loss_fn(one_hot_labels, np.tile(uniform_predictions, (100, 1)))
print(f"Loss for uniform predictions: {uniform_loss.numpy()}")


Loss for uniform predictions: 2.302585092994045


2nd ques

In [None]:


from tensorflow.keras import layers, models, initializers

# Define the model
def create_model():
    model = models.Sequential([
        layers.Flatten(input_shape=(28, 28)),  # Assuming MNIST input shape
        layers.Dense(10, activation='softmax', kernel_initializer=initializers.Zeros())
    ])
    return model

# Create and compile the model
model = create_model()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Create dummy data (e.g., MNIST-like data)
x_dummy = np.random.rand(100, 28, 28)
y_dummy = np.random.randint(0, 10, 100)

# Evaluate the model before training to get the initial loss
initial_loss, _ = model.evaluate(x_dummy, y_dummy, verbose=0)
print(f"Initial loss: {initial_loss}")

# Now, let's train the model for one epoch to check if the loss improves
model.fit(x_dummy, y_dummy, epochs=1, verbose=2)

# Evaluate the model after one epoch of training
trained_loss, _ = model.evaluate(x_dummy, y_dummy, verbose=0)
print(f"Loss after one epoch of training: {trained_loss}")


Initial loss: 2.3025851249694824
4/4 - 0s - loss: 2.3338 - accuracy: 0.1100 - 377ms/epoch - 94ms/step
Loss after one epoch of training: 2.1911497116088867


Pre-Training Using Autoencoders
We'll use the MNIST dataset for this example.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, initializers
from tensorflow.keras.datasets import mnist

# Load and preprocess the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape((x_train.shape[0], -1))  # Flatten the images
x_test = x_test.reshape((x_test.shape[0], -1))  # Flatten the images

# Define autoencoder model
def build_autoencoder(input_dim, encoding_dim):
    input_layer = layers.Input(shape=(input_dim,))
    encoded = layers.Dense(encoding_dim, activation='relu', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros())(input_layer)
    decoded = layers.Dense(input_dim, activation='sigmoid', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros())(encoded)
    autoencoder = models.Model(inputs=input_layer, outputs=decoded)
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
    return autoencoder

# Train autoencoders layer by layer
encoding_dims = [128, 64, 32]
autoencoders = []
input_dim = x_train.shape[1]

for encoding_dim in encoding_dims:
    autoencoder = build_autoencoder(input_dim, encoding_dim)
    print(f"Training autoencoder with input_dim={input_dim} and encoding_dim={encoding_dim}")
    autoencoder.fit(x_train, x_train, epochs=10, batch_size=256, shuffle=True, validation_data=(x_test, x_test))
    autoencoders.append(autoencoder)
    # Update input_dim for the next autoencoder
    input_dim = encoding_dim

# Create a model to encode data using the trained autoencoders
def encode_data(autoencoders, data):
    for autoencoder in autoencoders:
        data = autoencoder.predict(data)
    return data

# Encode training and test data
x_train_encoded = encode_data(autoencoders, x_train)
x_test_encoded = encode_data(autoencoders, x_test)

# Define the supervised model
def build_supervised_model(input_dim):
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(input_dim,), kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros()),
        layers.Dense(64, activation='relu', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros()),
        layers.Dense(10, activation='softmax', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros())
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train the supervised model
supervised_model = build_supervised_model(encoding_dims[-1])
supervised_model.fit(x_train_encoded, y_train, epochs=10, batch_size=256, validation_data=(x_test_encoded, y_test))

# Evaluate the supervised model
test_loss, test_acc = supervised_model.evaluate(x_test_encoded, y_test)
print(f"Test accuracy: {test_acc}")

# Inspect gradients and activations
from tensorflow.keras import backend as K

# Function to get gradients
def get_gradients(model, x, y):
    with tf.GradientTape() as tape:
        predictions = model(x, training=True)
        loss = model.compiled_loss(y, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    return gradients

# Function to get activations
def get_activations(model, x):
    activations = []
    for layer in model.layers:
        x = layer(x)
        activations.append(x)
    return activations

# Get gradients and activations for a batch of data
batch_x = x_train_encoded[:10]
batch_y = y_train[:10]
gradients = get_gradients(supervised_model, batch_x, batch_y)
activations = get_activations(supervised_model, batch_x)

# Print shapes of gradients and activations
for i, grad in enumerate(gradients):
    print(f"Gradient {i}: {grad.shape}")

for i, act in enumerate(activations):
    print(f"Activation {i}: {act.shape}")

# Check the loss values from history
import matplotlib.pyplot as plt

history = supervised_model.history

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


Training autoencoder with input_dim=784 and encoding_dim=128
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training autoencoder with input_dim=128 and encoding_dim=64
Epoch 1/10


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_9" is incompatible with the layer: expected shape=(None, 128), found shape=(None, 784)


4th ques

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, initializers
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

# Load and preprocess the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

# Define a model with zero initialization
def build_model():
    model = models.Sequential([
        layers.Dense(128, activation='relu', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros(), input_shape=(784,)),
        layers.Dense(64, activation='relu', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros()),
        layers.Dense(10, activation='softmax', kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Zeros())
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
model = build_model()

# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=256, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc}")

# Function to get gradients
def get_gradients(model, x, y):
    with tf.GradientTape() as tape:
        predictions = model(x, training=True)
        loss = model.compiled_loss(y, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    return gradients

# Create a model to extract activations
class ActivationModel(tf.keras.models.Model):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.layer_outputs = [layer.output for layer in model.layers]
        self.activations_model = tf.keras.models.Model(inputs=model.input, outputs=self.layer_outputs)

    def call(self, inputs):
        return self.activations_model(inputs)

# Get activations
activation_model = ActivationModel(model)
def get_activations(model, x):
    activations = activation_model(x)
    return activations

# Get gradients and activations for a batch of data
batch_x = x_train[:10]
batch_y = y_train[:10]
gradients = get_gradients(model, batch_x, batch_y)
activations = get_activations(model, batch_x)

# Print shapes of gradients and activations
for i, grad in enumerate(gradients):
    print(f"Gradient {i}: {grad.shape}")

for i, act in enumerate(activations):
    print(f"Activation {i}: {act.shape}")

# Plot loss values from history
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.11349999904632568


AttributeError: 'tuple' object has no attribute 'rank'

5th ques

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, initializers
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

# Load and preprocess the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape((x_train.shape[0], -1))
x_test = x_test.reshape((x_test.shape[0], -1))

# Define a model with Batch Normalization
def build_model():
    model = models.Sequential([
        layers.Dense(128, input_shape=(784,)),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.Dense(64),
        layers.BatchNormalization(),
        layers.ReLU(),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Create and train the model
model = build_model()
history = model.fit(x_train, y_train, epochs=10, batch_size=256, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc}")

# Fold Batch Normalization into the preceding Dense layer
def fold_batch_norm(model):
    new_model = models.Sequential()
    prev_layer_output_shape = None

    for layer in model.layers:
        if isinstance(layer, layers.BatchNormalization):
            continue  # Skip BatchNorm layers
        if isinstance(layer, layers.Dense):
            # Extract weights and biases
            weights, biases = layer.get_weights()
            if prev_layer_output_shape:
                # Incorporate BatchNorm parameters into Dense layer weights
                gamma, beta = prev_layer_bn_params
                mean, variance = prev_layer_bn_moments
                scale = gamma / tf.sqrt(variance + 1e-5)
                offset = beta - (gamma * mean) / tf.sqrt(variance + 1e-5)
                weights = weights * scale
                biases = biases - (offset * scale) + beta

            # Add Dense layer to the new model
            new_model.add(layers.Dense(layer.units,
                                       activation=layer.activation,
                                       kernel_initializer=layer.kernel_initializer,
                                       bias_initializer=layer.bias_initializer,
                                       input_shape=(prev_layer_output_shape,) if prev_layer_output_shape else None))
            new_model.layers[-1].set_weights([weights, biases])

        elif isinstance(layer, layers.ReLU):
            new_model.add(layer)
        elif isinstance(layer, layers.InputLayer):
            new_model.add(layer)

        # Save BatchNorm parameters from previous layer if applicable
        if isinstance(layer, layers.BatchNormalization):
            prev_layer_bn_params = [layer.gamma, layer.beta]
            prev_layer_bn_moments = [layer.moving_mean, layer.moving_variance]

        prev_layer_output_shape = layer.units if isinstance(layer, layers.Dense) else prev_layer_output_shape

    return new_model

# Fold the Batch Normalization layers
new_model = fold_batch_norm(model)

# Verify that the forward pass is consistent
def verify_models(model1, model2, x_data):
    preds1 = model1.predict(x_data)
    preds2 = model2.predict(x_data)
    return np.allclose(preds1, preds2)

# Compare predictions from the original and new model
is_same = verify_models(model, new_model, x_test[:10])
print(f"Forward pass is consistent: {is_same}")

# Plot loss values from history
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.9775999784469604


TypeError: 'NoneType' object is not iterable