## Working with Tensorflow
In this assignment, you will be familiarized with the usage of the tensorflow library and how to build a model for the MNIST database in two ways
*   using the inbuilt layers in tensorflow
*   using custom layers to replicate the same result



In [7]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical

## Loading and preprocessing the Data
We will directly be using the dataset included in tensorflow library
A detailed description of data is given at (https://www.tensorflow.org/api_docs/python/tf/keras/datasets/mnist/load_data)


In [14]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess the data
x_train = x_train / 255.0
x_test = x_test / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

Heres how the data looks

In [18]:
plt.imshow(x_train[0],cmap= 'Greys')

In [17]:
y_train[0]

In [19]:
plt.imshow(x_train[1],cmap= 'Greys')

In [20]:
plt.imshow(x_train[2],cmap= 'Greys')

# Making a simple feedforward network
As you have seen in the second week a simple feedfordward network works well to solve MNIST.<br/>
The following is a simple feedforward model with three layers:
* a flatten layer to convert our 28x28 images into a single array of length 784
* a dense layer of 128 neurons with the relu activation function
* finally a dense layer of 10 neurons with the softmax activation to get a distribution between the digits

In [1]:
# Build the model
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

## Making Custom Layers
You can go through this
<a href = 'https://www.tensorflow.org/tutorials/customization/custom_layers'> documentation </a> to get a feel for how to implement a custom layer

* Create a CustomDenseLayer with a Relu Activation
* Create a CustomDenseLayer with a Softmax Activation
* Create a CustomFlatten Layer

In [None]:
class CustomDenseReluLayer(tf.keras.layers.Layer):
    def __init__(self, units):
        super(CustomDenseReluLayer, self).__init__()
        self.units = units

    def build(self, input_shape):
        # Create weight matrix and bias vector
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='glorot_uniform',
            trainable=True,
            name='kernel'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='bias'
        )

    def call(self, inputs):
        # Linear transformation followed by ReLU activation
        z = tf.matmul(inputs, self.w) + self.b
        return tf.nn.relu(z)


class CustomDenseSoftmaxLayer(tf.keras.layers.Layer):
    def __init__(self, units):
        super(CustomDenseSoftmaxLayer, self).__init__()
        self.units = units

    def build(self, input_shape):
        # Create weight matrix and bias vector
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='glorot_uniform',
            trainable=True,
            name='kernel'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='bias'
        )

    def call(self, inputs):
        # Linear transformation followed by softmax activation
        z = tf.matmul(inputs, self.w) + self.b
        return tf.nn.softmax(z, axis=-1)


class CustomFlattenLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        # Flatten all dimensions except the batch dimension
        batch_size = tf.shape(inputs)[0]
        return tf.reshape(inputs, (batch_size, -1))

## Using out custom layers to Build a model for MNIST

In [None]:
# Example usage of the custom dense layer
model = Sequential([
    CustomFlattenLayer(),
    CustomDenseReluLayer(128),
    CustomDenseSoftmaxLayer(10)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc}')

# Assignment
* Load and preprocess Boston housing dataset
* build a Linear Regression model for it and optimize it using tensorflow (its basically a neural network with a single neuron and no activaton)
* build a Feedforward network for it you can expirement around with no of layers and and neurons in each layer and different activation functions <br/>





# Bonus Assignment 
* Try solving one more random dataset from kaggle/tensorflow datasets

In [None]:
## Boston Housing Dataset - Linear Regression and Feedforward Network
Below we load the Boston Housing dataset, build a simple linear regression model (single neuron, no activation), and then a feedforward network to compare performance.

In [22]:
# Load Boston Housing dataset
# Note: Boston Housing is deprecated in newer sklearn, so we fetch it from the original source
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Using California Housing as a modern alternative (Boston is deprecated)
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features for better convergence
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training samples: {X_train_scaled.shape[0]}")
print(f"Test samples: {X_test_scaled.shape[0]}")
print(f"Number of features: {X_train_scaled.shape[1]}")

In [None]:
# Linear Regression Model (single neuron, no activation)
# This is essentially y = Wx + b

linear_model = Sequential([
    Dense(1, input_shape=(X_train_scaled.shape[1],), activation=None)
])

linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss='mse',
    metrics=['mae']
)

print("Linear Regression Model Summary:")
linear_model.summary()

# Train the linear regression model
history_linear = linear_model.fit(
    X_train_scaled, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Evaluate on test set
test_loss_linear, test_mae_linear = linear_model.evaluate(X_test_scaled, y_test)
print(f"\nLinear Regression - Test MSE: {test_loss_linear:.4f}, Test MAE: {test_mae_linear:.4f}")

In [None]:
# Feedforward Neural Network for Boston Housing
# Experimenting with multiple layers and activation functions

feedforward_model = Sequential([
    Dense(64, input_shape=(X_train_scaled.shape[1],), activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation=None)  # No activation for regression output
])

feedforward_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae']
)

print("Feedforward Neural Network Summary:")
feedforward_model.summary()

# Train the feedforward model
history_ff = feedforward_model.fit(
    X_train_scaled, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Evaluate on test set
test_loss_ff, test_mae_ff = feedforward_model.evaluate(X_test_scaled, y_test)
print(f"\nFeedforward Network - Test MSE: {test_loss_ff:.4f}, Test MAE: {test_mae_ff:.4f}")

In [None]:
# Compare training histories and results
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot training loss comparison
axes[0].plot(history_linear.history['loss'], label='Linear Regression - Train')
axes[0].plot(history_linear.history['val_loss'], label='Linear Regression - Val')
axes[0].plot(history_ff.history['loss'], label='Feedforward - Train')
axes[0].plot(history_ff.history['val_loss'], label='Feedforward - Val')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('MSE Loss')
axes[0].set_title('Training Loss Comparison')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot MAE comparison
axes[1].plot(history_linear.history['mae'], label='Linear Regression - Train')
axes[1].plot(history_linear.history['val_mae'], label='Linear Regression - Val')
axes[1].plot(history_ff.history['mae'], label='Feedforward - Train')
axes[1].plot(history_ff.history['val_mae'], label='Feedforward - Val')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].set_title('Mean Absolute Error Comparison')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print final comparison
print("\n" + "="*50)
print("Final Model Comparison on Test Set")
print("="*50)
print(f"Linear Regression: MSE = {test_loss_linear:.4f}, MAE = {test_mae_linear:.4f}")
print(f"Feedforward NN:    MSE = {test_loss_ff:.4f}, MAE = {test_mae_ff:.4f}")
print("="*50)