In [None]:
# Install required packages

!pip install tensorflow==2.18.0
!pip install keras==3.7.0
!pip install torch==2.5.1
!pip install torchvision==0.20.1

!pip install numpy==2.0.2
!pip install scipy==1.14.1
!pip install pandas==2.2.3

!pip install scikit-learn==1.5.2

!pip install matplotlib==3.9.2

!pip install joblib==1.4.2
!pip install python-dateutil==2.9.0.post0

!pip install sympy==1.13.1
!pip install opt-einsum==3.4.0

!pip install tensorboard==2.18.0
!pip install protobuf==5.29.0
!pip install threadpoolctl==3.5.0
!pip install packaging==24.2


#1. Import Necessary Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.regularizers import l2

* numpy: For numerical operations and dataset creation.
* matplotlib.pyplot: For visualizing loss and accuracy trends.
* train_test_split: Splits the dataset into training and testing subsets.
* Sequential, Dense: *Tools to build a shallow neural network*.
* Adam: Optimizer for efficient training.
* l2: *Regularizer for weight decay, which penalizes large weights*.

#2. Generate Synthetic Dataset

In [None]:
np.random.seed(42)
n_samples = 100  # Larger dataset for better training
X = np.random.uniform(-1, 1, size=(n_samples, 1))
y = (np.sin(2 * np.pi * X).ravel() + 0.7 * np.random.normal(size=n_samples)) > 0  # Binary classification

* X: Input features uniformly sampled between *-1 and 1*.
* y: Binary labels generated by thresholding a sine function with added Gaussian noise (*𝜎= 0.7*).

#3. Split Data into Training and Testing Sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Training Set**: *80% of the dataset used for model training*.

**Testing Set**: *20% reserved for evaluating the model's performance*.

#4. Experiment Configurations

In [None]:
epoch_counts = [250, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500]  # Epoch range
weight_decay_values = [0.0, 0.01, 0.1, 0.5, 1.0]  # Different weight decay values
hidden_layer_size = 50  # Fixed number of neurons in the hidden layer

* epoch_counts: Range of epochs for analyzing training duration effects.
* weight_decay_values: Specifies levels of weight decay (*L2 regularization strength*).
* hidden_layer_size: Defines the complexity of the shallow neural network.

#5. Initialize Results Storage

In [None]:
results = {}

Stores training and testing **losses** and **accuracies** for each weight decay value.


#6. Iterate Over Weight Decay Values and Epoch Configurations

In [None]:
for weight_decay in weight_decay_values:
    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    for epochs in epoch_counts:
        # Build the model
        model = Sequential([
            Dense(hidden_layer_size, input_dim=1, activation='relu', kernel_regularizer=l2(weight_decay)),
            Dense(1, activation='sigmoid', kernel_regularizer=l2(weight_decay))
        ])

        # Compile the model
        model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])

        # Train the model
        model.fit(X_train, y_train, epochs=epochs, verbose=0, batch_size=16)

        # Evaluate the model
        train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
        test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

        # Log the results
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        train_accuracies.append(train_acc)
        test_accuracies.append(test_acc)

    # Store results for the current weight decay value
    results[weight_decay] = {
        'train_losses': train_losses,
        'test_losses': test_losses,
        'train_accuracies': train_accuracies,
        'test_accuracies': test_accuracies
    }

1. Model Creation:

* A shallow neural network is created with **L2 regularization** applied to each dense layer.
* The weight decay strength is determined by the current weight_decay value.
2. Compilation:

* **Optimizer**: Adam with a *learning rate of 0.01*.
* **Loss**: Binary Crossentropy for binary classification.
* **Metrics**: *Accuracy is tracked during evaluation*.
3. Training and Evaluation:

* The model is **trained** for each epoch configuration.
* Training and testing **losses and accuracies** are evaluated and stored.
4. Result Storage:

* Results for each **weight decay value** are stored, including **losses and accuracies**.

#7. Visualize Loss Results

In [None]:
plt.figure(figsize=(10, 6))
for weight_decay in weight_decay_values:
    plt.plot(epoch_counts, results[weight_decay]['test_losses'], label=f"Test Loss (Weight Decay={weight_decay})", marker='o')
    plt.plot(epoch_counts, results[weight_decay]['train_losses'], linestyle='--', label=f"Train Loss (Weight Decay={weight_decay})")

plt.xlabel('Number of Epochs')
plt.ylabel('Loss (Binary Crossentropy)')
plt.title('Effect of Weight Decay Regularization on Loss')
plt.legend()
plt.grid()
plt.show()

**X-axis**: Number of epochs.

**Y-axis**: Binary crossentropy loss (*train and test*).

Curves:

* **Solid lines** for test loss.
* **Dashed lines** for training loss.


#8. Visualize Accuracy Results

In [None]:
plt.figure(figsize=(10, 6))
for weight_decay in weight_decay_values:
    plt.plot(epoch_counts, results[weight_decay]['test_accuracies'], label=f"Test Accuracy (Weight Decay={weight_decay})", marker='o')
    plt.plot(epoch_counts, results[weight_decay]['train_accuracies'], linestyle='--', label=f"Train Accuracy (Weight Decay={weight_decay})")

plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy')
plt.title('Effect of Weight Decay Regularization on Accuracy')
plt.legend()
plt.grid()
plt.show()

**X-axis**: Number of epochs.

**Y-axis**: Accuracy (train and test).

**Curves**:

* **Solid lines** for *test accuracy*.
* **Dashed lines** for *training accuracy*.

#Key Observations
**Weight Decay Impact**:

* *Reduces overfitting* at moderate levels (e.g.,*𝛼 = 0.1*).
* **Excessive weight decay** (e.g.,*𝛼 = 1.0*) may cause **underfitting**, increasing test loss and reducing accuracy.

**Double Descent**:

* Loss and accuracy trends reveal **epoch-wise double descent behavior** for various weight decay configurations.