In [3]:
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact, FloatSlider, Dropdown, Output
from IPython.display import display, clear_output
%matplotlib inline

class MLPLossFunctionsVisualization:
    def __init__(self):
        self.output = Output()

    def sigmoid(self, x):
        """Sigmoid function"""
        return 1 / (1 + np.exp(-x))

    def relu(self, x):
        """ReLU function"""
        return np.maximum(0, x)

    def softmax(self, x):
        """Softmax function"""
        exp_x = np.exp(x - np.max(x))
        return exp_x / exp_x.sum()

    def square_loss(self, y_pred, y_true):
        """Square loss function (MSE)"""
        return (y_pred - y_true) ** 2

    def cross_entropy_loss(self, y_pred, y_true):
        """Cross-entropy loss function"""
        # Add small epsilon for numerical stability
        eps = 1e-15
        y_pred = np.clip(y_pred, eps, 1 - eps)
        return -y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred)

    def visualize_loss_functions(self):
        """Visualize loss functions"""
        with self.output:
            clear_output(wait=True)
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

            # MSE loss function
            y_pred_values = np.linspace(-2, 3, 100)
            y_true = 1.0
            mse_loss = self.square_loss(y_pred_values, y_true)

            ax1.plot(y_pred_values, mse_loss, 'b-', linewidth=2)
            ax1.set_title('Square Loss Function (MSE)', fontsize=14)
            ax1.set_xlabel('Predicted Value', fontsize=12)
            ax1.set_ylabel('Loss', fontsize=12)
            ax1.axvline(x=y_true, color='g', linestyle='--', alpha=0.7)
            ax1.text(y_true+0.1, 0.5, f'True Value: {y_true}',
                    color='g', fontsize=10, verticalalignment='center')
            ax1.grid(True, alpha=0.3)
            ax1.set_ylim(0, 10)

            # Cross-entropy loss function
            y_pred_values = np.linspace(0.01, 0.99, 100)
            y_true = 1.0
            ce_loss = self.cross_entropy_loss(y_pred_values, y_true)

            ax2.plot(y_pred_values, ce_loss, 'r-', linewidth=2)
            ax2.set_title('Cross-Entropy Loss Function', fontsize=14)
            ax2.set_xlabel('Predicted Probability', fontsize=12)
            ax2.set_ylabel('Loss', fontsize=12)
            ax2.axvline(x=y_true, color='g', linestyle='--', alpha=0.7)
            ax2.text(y_true-0.2, 0.5, f'True Value: {y_true}',
                    color='g', fontsize=10, verticalalignment='center')
            ax2.grid(True, alpha=0.3)
            ax2.set_ylim(0, 5)

            plt.tight_layout()
            plt.show()

            # Loss function features
            loss_features = {
                'MSE (Square Loss)': [
                    'Primarily used for regression problems',
                    'Calculates the square of the difference between predicted and actual values',
                    'Differentiable, suitable for gradient descent',
                    'Sensitive to outliers',
                    'Formula: L = (y_pred - y_true)²'
                ],
                'Cross-Entropy Loss': [
                    'Primarily used for classification problems',
                    'Measures the difference between predicted probability distribution and actual distribution',
                    'Gradient calculation becomes simple when used with softmax',
                    'Applies larger penalties to incorrect predictions',
                    'Formula: L = -Σ(y_true * log(y_pred))'
                ]
            }

            # Display loss function features
            for loss_name, features in loss_features.items():
                display(widgets.HTML(f"<h3>{loss_name} Features:</h3>"))
                feature_list = "".join([f"<li>{feature}</li>" for feature in features])
                display(widgets.HTML(f"<ul>{feature_list}</ul>"))

    def visualize_activation_functions(self):
        """Visualize activation functions"""
        with self.output:
            clear_output(wait=True)
            fig, axs = plt.subplots(2, 2, figsize=(16, 10))

            # Input range
            x = np.linspace(-5, 5, 200)

            # Sigmoid
            axs[0, 0].plot(x, self.sigmoid(x), 'b-', linewidth=2)
            axs[0, 0].set_title('Sigmoid Function', fontsize=14)
            axs[0, 0].set_xlabel('Input', fontsize=12)
            axs[0, 0].set_ylabel('Output', fontsize=12)
            axs[0, 0].grid(True, alpha=0.3)
            axs[0, 0].set_ylim(-0.1, 1.1)
            axs[0, 0].axhline(y=0, color='k', linestyle='-', alpha=0.3)
            axs[0, 0].axhline(y=1, color='k', linestyle='-', alpha=0.3)
            axs[0, 0].axvline(x=0, color='k', linestyle='-', alpha=0.3)

            # ReLU
            axs[0, 1].plot(x, self.relu(x), 'r-', linewidth=2)
            axs[0, 1].set_title('ReLU Function', fontsize=14)
            axs[0, 1].set_xlabel('Input', fontsize=12)
            axs[0, 1].set_ylabel('Output', fontsize=12)
            axs[0, 1].grid(True, alpha=0.3)
            axs[0, 1].axhline(y=0, color='k', linestyle='-', alpha=0.3)
            axs[0, 1].axvline(x=0, color='k', linestyle='-', alpha=0.3)

            # Softmax (3 classes)
            x_softmax = np.linspace(-5, 5, 100)
            softmax_results = []

            for i in x_softmax:
                # Generate input values for 3 classes
                logits = np.array([i, 0, -i])
                softmax_results.append(self.softmax(logits))

            softmax_results = np.array(softmax_results)

            axs[1, 0].plot(x_softmax, softmax_results[:, 0], 'r-', label='Class 1', linewidth=2)
            axs[1, 0].plot(x_softmax, softmax_results[:, 1], 'g-', label='Class 2', linewidth=2)
            axs[1, 0].plot(x_softmax, softmax_results[:, 2], 'b-', label='Class 3', linewidth=2)
            axs[1, 0].set_title('Softmax Function (3 Classes)', fontsize=14)
            axs[1, 0].set_xlabel('Input', fontsize=12)
            axs[1, 0].set_ylabel('Probability', fontsize=12)
            axs[1, 0].legend()
            axs[1, 0].grid(True, alpha=0.3)
            axs[1, 0].set_ylim(-0.1, 1.1)

            # Linear function
            axs[1, 1].plot(x, x, 'g-', linewidth=2)
            axs[1, 1].set_title('Linear Function', fontsize=14)
            axs[1, 1].set_xlabel('Input', fontsize=12)
            axs[1, 1].set_ylabel('Output', fontsize=12)
            axs[1, 1].grid(True, alpha=0.3)
            axs[1, 1].axhline(y=0, color='k', linestyle='-', alpha=0.3)
            axs[1, 1].axvline(x=0, color='k', linestyle='-', alpha=0.3)

            plt.tight_layout()
            plt.show()

            # Activation function use cases
            activation_use_cases = {
                'Sigmoid': [
                    'Output range: 0~1',
                    'Used in output layer for binary classification',
                    'Advantage: Can be interpreted as probability, differentiable',
                    'Disadvantage: Vanishing gradient problem, slow learning'
                ],
                'ReLU': [
                    'Output range: 0~∞',
                    'Most widely used in hidden layers',
                    'Advantage: Fast learning, mitigates vanishing gradient',
                    'Disadvantage: Dying ReLU problem (gradient 0 for negative inputs)'
                ],
                'Softmax': [
                    'Output: Probability distribution across all classes (sum=1)',
                    'Used in output layer for multi-class classification',
                    'Advantage: Simple derivative calculation when used with cross-entropy',
                    'Characteristic: Expresses relative probability for each class'
                ],
                'Linear': [
                    'Output range: -∞~∞',
                    'Used in output layer for regression problems',
                    'Advantage: Unlimited output range',
                    'Characteristic: Simply outputs the input as is'
                ]
            }

            for activation, features in activation_use_cases.items():
                display(widgets.HTML(f"<h3>{activation} Activation Function:</h3>"))
                feature_list = "".join([f"<li>{feature}</li>" for feature in features])
                display(widgets.HTML(f"<ul>{feature_list}</ul>"))

    def setup_interactive_widgets(self):
        """Set up interactive widgets"""
        tab_dropdown = Dropdown(
            options=['Loss Functions', 'Activation Functions'],
            value='Loss Functions',
            description='Visualization:',
            style={'description_width': 'initial'}
        )

        def on_change(change):
            if change['new'] == 'Loss Functions':
                self.visualize_loss_functions()
            else:
                self.visualize_activation_functions()

        tab_dropdown.observe(on_change, names='value')

        display(widgets.HTML("<h2>MLP Loss and Activation Functions Visualization</h2>"))
        display(widgets.HTML("<p>Select the item to visualize from the dropdown menu.</p>"))
        display(tab_dropdown)
        display(self.output)

        # Show initial visualization
        self.visualize_loss_functions()

# Run visualization
viz = MLPLossFunctionsVisualization()
viz.setup_interactive_widgets()

HTML(value='<h2>MLP Loss and Activation Functions Visualization</h2>')

HTML(value='<p>Select the item to visualize from the dropdown menu.</p>')

Dropdown(description='Visualization:', options=('Loss Functions', 'Activation Functions'), style=DescriptionSt…

Output()