<a href="https://colab.research.google.com/github/Brilland-baba/Brilland-BABA/blob/main/Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook by Rodeo Oswald Y. TOHA

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.datasets import make_regression
import warnings
warnings.filterwarnings('ignore')

In [None]:
class RegularizationDemo:
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.X, self.y, self.true_coef = self._generate_data()
        self.models = {}
        self.setup_models()

    def _generate_data(self, n_samples=100, n_features=2):
        """Generate correlated data for demonstration"""
        np.random.seed(self.random_state)

        # Create correlated features
        X = np.random.randn(n_samples, 2)
        X[:, 1] = 0.7 * X[:, 0] + 0.3 * X[:, 1]  # Make features correlated

        # True coefficients
        true_coef = np.array([2.0, -1.5])

        # Generate target with noise
        y = X @ true_coef + np.random.randn(n_samples) * 0.5

        return X, y, true_coef

    def setup_models(self):
        """Initialize the three regularization models"""
        self.models = {
            'Ridge (L2)': Pipeline([
                ('scaler', StandardScaler()),
                ('model', Ridge(alpha=0.1, random_state=self.random_state))
            ]),
            'Lasso (L1)': Pipeline([
                ('scaler', StandardScaler()),
                ('model', Lasso(alpha=0.1, random_state=self.random_state))
            ]),
            'ElasticNet': Pipeline([
                ('scaler', StandardScaler()),
                ('model', ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=self.random_state))
            ])
        }

    def fit_models(self, alpha=0.1, l1_ratio=0.5):
        """Fit all models with given parameters"""
        # Update parameters
        self.models['Ridge (L2)'].set_params(model__alpha=alpha)
        self.models['Lasso (L1)'].set_params(model__alpha=alpha)
        self.models['ElasticNet'].set_params(model__alpha=alpha, model__l1_ratio=l1_ratio)

        # Fit all models
        coefficients = {}
        for name, model in self.models.items():
            model.fit(self.X, self.y)
            if hasattr(model.named_steps['model'], 'coef_'):
                coefficients[name] = model.named_steps['model'].coef_
            else:
                coefficients[name] = model.coef_

        return coefficients

    def plot_contour_comparison(self, alphas=[0.01, 0.1, 1.0, 10.0]):
        """Plot coefficient paths and constraint regions for different alpha values"""
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        axes = axes.ravel()

        # Generate a range of alpha values for paths
        alpha_range = np.logspace(-3, 2, 100)

        # Store coefficient paths
        coef_paths = {name: [] for name in self.models.keys()}

        for alpha in alpha_range:
            coefs = self.fit_models(alpha=alpha)
            for name, coef in coefs.items():
                coef_paths[name].append(coef)

        # Convert to arrays
        for name in coef_paths:
            coef_paths[name] = np.array(coef_paths[name])

        # Plot 1: Coefficient paths vs alpha
        ax = axes[0]
        colors = {'Ridge (L2)': 'blue', 'Lasso (L1)': 'red', 'ElasticNet': 'green'}

        for name, path in coef_paths.items():
            for i in range(path.shape[1]):
                ax.semilogx(alpha_range, path[:, i],
                           color=colors[name],
                           alpha=0.7 if i == 0 else 0.4,
                           label=f'{name} - coef{i+1}' if i == 0 else "")

        ax.axhline(y=self.true_coef[0], color='black', linestyle='--', alpha=0.5, label='True coef1')
        ax.axhline(y=self.true_coef[1], color='gray', linestyle='--', alpha=0.5, label='True coef2')
        ax.set_xlabel('Alpha (Regularization strength)')
        ax.set_ylabel('Coefficient Value')
        ax.set_title('Coefficient Paths vs Regularization Strength')
        ax.legend()
        ax.grid(True, alpha=0.3)

        # Plot 2-4: Constraint regions for specific alpha values
        plot_positions = [(0, 1), (1, 0), (1, 1)]

        for idx, alpha in enumerate(alphas[:3]):
            ax = axes[plot_positions[idx][0] * 2 + plot_positions[idx][1]]
            self._plot_constraint_region(ax, alpha, coef_paths, colors)

        plt.tight_layout()
        plt.show()

    def _plot_constraint_region(self, ax, alpha, coef_paths, colors):
        """Plot constraint regions and coefficient locations"""
        # Get coefficients for this alpha
        coefs = self.fit_models(alpha=alpha)

        # Create constraint regions
        theta = np.linspace(0, 2*np.pi, 100)

        # L2 constraint (circle)
        radius = 1.0 / (alpha + 1e-8)  # Approximate constraint size
        l2_x = radius * np.cos(theta)
        l2_y = radius * np.sin(theta)

        # L1 constraint (diamond)
        l1_x = radius * np.array([1, 0, -1, 0, 1])
        l1_y = radius * np.array([0, 1, 0, -1, 0])

        # Plot constraint regions
        ax.plot(l2_x, l2_y, 'b--', alpha=0.5, label='L2 constraint')
        ax.plot(l1_x, l1_y, 'r--', alpha=0.5, label='L1 constraint')

        # Plot coefficient points
        markers = ['o', 's', '^']
        for i, (name, coef) in enumerate(coefs.items()):
            ax.plot(coef[0], coef[1], marker=markers[i], color=colors[name],
                   markersize=10, label=name)

        # Plot OLS solution (alpha = 0)
        ols_coef = self.fit_models(alpha=0.001)['Ridge (L2)']
        ax.plot(ols_coef[0], ols_coef[1], 'k*', markersize=15, label='Near OLS')

        # Plot true coefficients
        ax.plot(self.true_coef[0], self.true_coef[1], 'gX', markersize=12,
               label='True coefficients')

        ax.set_xlabel('Coefficient 1')
        ax.set_ylabel('Coefficient 2')
        ax.set_title(f'Constraint Regions (alpha={alpha})')
        ax.legend()
        ax.grid(True, alpha=0.3)
        ax.axis('equal')

    def plot_shrinkage_effect(self, alpha_range=np.logspace(-3, 2, 50)):
        """Visualize how coefficients shrink with increasing regularization"""
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

        coef_paths = {name: [] for name in self.models.keys()}

        for alpha in alpha_range:
            coefs = self.fit_models(alpha=alpha)
            for name, coef in coefs.items():
                coef_paths[name].append(coef)

        # Plot coefficient magnitudes
        colors = {'Ridge (L2)': 'blue', 'Lasso (L1)': 'red', 'ElasticNet': 'green'}

        ax1.semilogx(alpha_range, [np.linalg.norm(path) for path in coef_paths['Ridge (L2)']],
                    'b-', label='Ridge (L2)', linewidth=2)
        ax1.semilogx(alpha_range, [np.linalg.norm(path) for path in coef_paths['Lasso (L1)']],
                    'r-', label='Lasso (L1)', linewidth=2)
        ax1.semilogx(alpha_range, [np.linalg.norm(path) for path in coef_paths['ElasticNet']],
                    'g-', label='ElasticNet', linewidth=2)

        ax1.set_xlabel('Alpha (Regularization Strength)')
        ax1.set_ylabel('L2 Norm of Coefficients')
        ax1.set_title('Shrinkage Effect: Coefficient Magnitude vs Alpha')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        # Plot individual coefficient behavior
        for name, path in coef_paths.items():
            path_array = np.array(path)
            for i in range(path_array.shape[1]):
                ax2.semilogx(alpha_range, path_array[:, i],
                           color=colors[name],
                           linestyle='-' if i == 0 else '--',
                           label=f'{name} - coef{i+1}')

        ax2.axhline(y=0, color='black', linestyle=':', alpha=0.5)
        ax2.set_xlabel('Alpha (Regularization Strength)')
        ax2.set_ylabel('Coefficient Value')
        ax2.set_title('Individual Coefficient Shrinkage')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

    def plot_sparsity_analysis(self):
        """Analyze when coefficients become exactly zero (sparsity)"""
        alpha_range = np.logspace(-3, 2, 200)

        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        axes = axes.ravel()

        models_to_test = {
            'Ridge (L2)': Ridge,
            'Lasso (L1)': Lasso,
            'ElasticNet (l1_ratio=0.5)': ElasticNet,
            'ElasticNet (l1_ratio=0.8)': ElasticNet
        }

        params = {
            'Ridge (L2)': {},
            'Lasso (L1)': {},
            'ElasticNet (l1_ratio=0.5)': {'l1_ratio': 0.5},
            'ElasticNet (l1_ratio=0.8)': {'l1_ratio': 0.8}
        }

        for idx, (name, model_class) in enumerate(models_to_test.items()):
            ax = axes[idx]
            coefs_1, coefs_2 = [], []
            zero_threshold = 1e-6  # Values below this considered zero

            for alpha in alpha_range:
                if name.startswith('ElasticNet'):
                    model = Pipeline([
                        ('scaler', StandardScaler()),
                        ('model', model_class(alpha=alpha, **params[name], random_state=self.random_state))
                    ])
                else:
                    model = Pipeline([
                        ('scaler', StandardScaler()),
                        ('model', model_class(alpha=alpha, random_state=self.random_state))
                    ])

                model.fit(self.X, self.y)
                coef = model.named_steps['model'].coef_
                coefs_1.append(coef[0])
                coefs_2.append(coef[1])

            coefs_1, coefs_2 = np.array(coefs_1), np.array(coefs_2)

            # Plot coefficients
            line1 = ax.semilogx(alpha_range, coefs_1, 'b-', label='Coefficient 1', linewidth=2)
            line2 = ax.semilogx(alpha_range, coefs_2, 'r-', label='Coefficient 2', linewidth=2)

            # Highlight when coefficients become zero
            zero_mask_1 = np.abs(coefs_1) < zero_threshold
            zero_mask_2 = np.abs(coefs_2) < zero_threshold

            if np.any(zero_mask_1):
                first_zero_1 = alpha_range[zero_mask_1][0]
                ax.axvline(x=first_zero_1, color='blue', linestyle=':', alpha=0.7)
                ax.text(first_zero_1, ax.get_ylim()[0], f' coef1=0',
                       color='blue', rotation=90, va='bottom')

            if np.any(zero_mask_2):
                first_zero_2 = alpha_range[zero_mask_2][0]
                ax.axvline(x=first_zero_2, color='red', linestyle=':', alpha=0.7)
                ax.text(first_zero_2, ax.get_ylim()[1], f' coef2=0',
                       color='red', rotation=90, va='top')

            ax.set_xlabel('Alpha')
            ax.set_ylabel('Coefficient Value')
            ax.set_title(f'{name}\nSparsity Analysis')
            ax.legend()
            ax.grid(True, alpha=0.3)
            ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)

        plt.tight_layout()
        plt.show()

    def interactive_demo(self):
        """Interactive widget to play with parameters (if running in Jupyter)"""
        try:
            from ipywidgets import interact, FloatSlider
        except ImportError:
            print("IPywidgets not available. Running static demo instead.")
            self.run_comprehensive_demo()
            return

        @interact(
            alpha=FloatSlider(min=0.001, max=10.0, value=0.1, step=0.1, description='Alpha:'),
            l1_ratio=FloatSlider(min=0.0, max=1.0, value=0.5, step=0.1, description='L1 Ratio:')
        )
        def update_plots(alpha=0.1, l1_ratio=0.5):
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

            # Fit models with current parameters
            coefficients = self.fit_models(alpha=alpha, l1_ratio=l1_ratio)

            # Plot 1: Coefficient values
            names = list(coefficients.keys())
            coef_values = [coefficients[name] for name in names]

            x_pos = np.arange(len(names))
            width = 0.35

            for i in range(2):  # For two coefficients
                ax1.bar(x_pos + i*width, [coef[i] for coef in coef_values],
                       width, label=f'Coefficient {i+1}', alpha=0.7)

            ax1.axhline(y=self.true_coef[0], color='blue', linestyle='--', alpha=0.5)
            ax1.axhline(y=self.true_coef[1], color='orange', linestyle='--', alpha=0.5)
            ax1.set_xlabel('Model')
            ax1.set_ylabel('Coefficient Value')
            ax1.set_title(f'Coefficient Values (alpha={alpha}, l1_ratio={l1_ratio})')
            ax1.set_xticks(x_pos + width/2)
            ax1.set_xticklabels(names, rotation=45)
            ax1.legend()
            ax1.grid(True, alpha=0.3)

            # Plot 2: Constraint visualization
            self._plot_simple_constraint(ax2, alpha, l1_ratio, coefficients)
            plt.tight_layout()
            plt.show()

    def _plot_simple_constraint(self, ax, alpha, l1_ratio, coefficients):
        """Simple constraint region plot for interactive demo"""
        # Constraint regions
        theta = np.linspace(0, 2*np.pi, 100)
        radius = 1.0 / (alpha + 1e-8)

        # L2 constraint
        l2_x = radius * np.cos(theta)
        l2_y = radius * np.sin(theta)

        # L1 constraint
        l1_x = radius * np.array([1, 0, -1, 0, 1])
        l1_y = radius * np.array([0, 1, 0, -1, 0])

        ax.plot(l2_x, l2_y, 'b--', alpha=0.3, label='L2 constraint')
        ax.plot(l1_x, l1_y, 'r--', alpha=0.3, label='L1 constraint')

        # Plot coefficients
        colors = {'Ridge (L2)': 'blue', 'Lasso (L1)': 'red', 'ElasticNet': 'green'}
        markers = {'Ridge (L2)': 'o', 'Lasso (L1)': 's', 'ElasticNet': '^'}

        for name, coef in coefficients.items():
            ax.plot(coef[0], coef[1], marker=markers[name], color=colors[name],
                   markersize=10, label=name)

        # OLS reference
        ols_coef = self.fit_models(alpha=0.001)['Ridge (L2)']
        ax.plot(ols_coef[0], ols_coef[1], 'k*', markersize=12, label='Near OLS')

        ax.set_xlabel('Coefficient 1')
        ax.set_ylabel('Coefficient 2')
        ax.set_title('Constraint Regions & Solutions')
        ax.legend()
        ax.grid(True, alpha=0.3)
        ax.axis('equal')

    def run_comprehensive_demo(self):
        """Run all visualizations"""
        print("True coefficients:", self.true_coef)
        print("\n1. Plotting contour comparison...")
        self.plot_contour_comparison()

        print("\n2. Plotting shrinkage effect...")
        self.plot_shrinkage_effect()

        print("\n3. Plotting sparsity analysis...")
        self.plot_sparsity_analysis()

        print("\nDemo complete! Key insights:")
        print("- Ridge: Smooth shrinkage, never exactly zero")
        print("- Lasso: Creates sparsity (exact zeros)")
        print("- ElasticNet: Balanced approach")

In [None]:
demo.interactive_demo()

interactive(children=(FloatSlider(value=0.1, description='Alpha:', max=10.0, min=0.001), FloatSlider(value=0.5…

[Here is a demo created with Claude AI for better visualization](https://claude.ai/public/artifacts/d2c370a0-2ba5-4391-a7d2-54cb663774c1)

# Notebook by Rodeo Oswald Y. TOHA