diff --git a/README.md b/README.md index d95b527..d3f710c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,327 @@ -# unsupervised_deep_learning_toolbox -๐Ÿง  Classical Unsupervised ML Algorithms - Custom implementations of traditional unsupervised learning algorithms with comparative analysis across visualization, compression, and synthetic data generation tasks. Educational repository focused on understanding algorithm selection and real-world applications. +# UDL Toolbox - Unsupervised Deep Learning Toolbox + +๐Ÿง  **Comprehensive Autoencoder Implementations with Custom Loss Functions and Visualization Tools** + +A complete implementation of autoencoder architectures with custom loss functions, data projection utilities, and visualization tools, built from scratch using TensorFlow/Keras. + +## โœจ Features + +### ๐Ÿ—๏ธ Autoencoder Architectures +- **Vanilla Autoencoder**: Basic encoder-decoder architecture +- **Sparse Autoencoder**: Enforces sparsity in hidden representations +- **Denoising Autoencoder**: Learns robust representations from corrupted inputs +- **Variational Autoencoder (VAE)**: Probabilistic latent space modeling +- **Convolutional Autoencoder**: For image data with spatial structure preservation + +### ๐ŸŽฏ Custom Loss Functions +- **Reconstruction Losses**: MSE, Binary Crossentropy, Huber Loss +- **Regularization Terms**: KL Divergence, Sparsity Regularization, L1/L2 +- **VAE-Specific**: Combined reconstruction + KL loss with ฮฒ-VAE support + +### ๐Ÿ“Š Data Projection & Analysis +- **PCA Projection**: Linear dimensionality reduction from scratch +- **t-SNE Projection**: Non-linear embedding for visualization +- **Latent Space Interpolation**: Smooth transitions in learned representations + +### ๐Ÿ“ˆ Visualization Tools +- **Latent Space Visualization**: 2D/3D plots, distribution analysis +- **Reconstruction Quality**: Error analysis, before/after comparisons +- **Training Progress**: Loss curves, component analysis, convergence metrics + +### ๐Ÿ› ๏ธ Utilities +- **Data Preprocessing**: Scaling, normalization, augmentation +- **Model I/O**: Save/load models, checkpoints, configurations +- **Custom Training Loops**: Gradient computation, progress tracking + +## ๐Ÿš€ Quick Start + +### Installation + +```bash +# Clone the repository +git clone https://github.com/Smveer/unsupervised_deep_learning_toolbox.git +cd unsupervised_deep_learning_toolbox + +# Install dependencies +pip install -r requirements.txt + +# Install the package +pip install -e . +``` + +### Basic Usage + +```python +import numpy as np +from udl_toolbox.autoencoders import VanillaAutoencoder +from udl_toolbox.utils import DataPreprocessor +from udl_toolbox.visualization import LatentSpaceVisualizer + +# Prepare data +data = np.random.random((1000, 50)) # 1000 samples, 50 features +preprocessor = DataPreprocessor(scaling_method='standard') +data_splits = preprocessor.prepare_data(data, validation_split=0.2) + +# Create and train autoencoder +autoencoder = VanillaAutoencoder( + input_dim=50, + latent_dim=10, + encoder_layers=[30, 20], + decoder_layers=[20, 30] +) + +history = autoencoder.fit( + data_splits['train'], + validation_data=data_splits['validation'], + epochs=100, + batch_size=32 +) + +# Visualize results +visualizer = LatentSpaceVisualizer(autoencoder) +visualizer.plot_2d_latent_space(data_splits['train'], method='pca') +``` + +## ๐Ÿ“š Examples + +### Sparse Autoencoder +```python +from udl_toolbox.autoencoders import SparseAutoencoder + +sparse_ae = SparseAutoencoder( + input_dim=784, # MNIST-like data + latent_dim=128, + encoder_layers=[512, 256], + decoder_layers=[256, 512], + sparsity_target=0.05, # Target 5% activation + sparsity_weight=1.0, + activation='sigmoid' +) + +# Train and analyze sparsity +history = sparse_ae.fit(train_data, epochs=100) +sparse_ae.visualize_sparsity(train_data) +``` + +### Variational Autoencoder +```python +from udl_toolbox.autoencoders import VariationalAutoencoder + +vae = VariationalAutoencoder( + input_dim=784, + latent_dim=20, + encoder_layers=[400, 200], + decoder_layers=[200, 400], + beta=1.0, # ฮฒ-VAE parameter + reconstruction_loss='binary_crossentropy' +) + +# Train VAE +history = vae.fit(train_data, epochs=100) + +# Generate new samples +generated_samples = vae.generate(num_samples=10) + +# Interpolate between points +interpolated = vae.interpolate(sample1, sample2, num_steps=10) +``` + +### Convolutional Autoencoder +```python +from udl_toolbox.autoencoders import ConvolutionalAutoencoder + +conv_ae = ConvolutionalAutoencoder( + input_shape=(28, 28, 1), # MNIST images + latent_dim=64, + encoder_filters=[32, 64, 128], + decoder_filters=[128, 64, 32], + kernel_size=3, + strides=2 +) + +# Train on image data +history = conv_ae.fit(image_data, epochs=50) + +# Visualize feature maps +feature_maps = conv_ae.get_feature_maps(test_images) +``` + +### Custom Loss Functions +```python +from udl_toolbox.losses import VAELoss, SparsityRegularization + +# Custom VAE loss with ฮฒ annealing +vae_loss = VAELoss( + reconstruction_loss='mse', + beta=0.1, # Start with low ฮฒ + reduction='mean' +) + +# Sparsity regularization +sparsity_reg = SparsityRegularization( + sparsity_target=0.02, + sparsity_weight=2.0 +) +``` + +## ๐Ÿ”ง Advanced Features + +### Data Projection Analysis +```python +from udl_toolbox.projections import PCAProjection, LatentSpaceInterpolation + +# PCA analysis +pca = PCAProjection(n_components=10) +pca_data = pca.fit_transform(high_dim_data) +explained_var = pca.get_explained_variance_ratio() + +# Latent space interpolation +interpolator = LatentSpaceInterpolation(autoencoder) +path = interpolator.latent_arithmetic( + base_point=latent_vector, + direction_vector=semantic_direction, + scales=[-2, -1, 0, 1, 2] +) +``` + +### Model Saving & Loading +```python +from udl_toolbox.utils import ModelSaver + +saver = ModelSaver() + +# Save complete model +saver.save_autoencoder( + autoencoder, + save_path="./models/my_autoencoder", + save_format='complete', + include_optimizer=True +) + +# Load model +loaded_ae = saver.load_autoencoder( + "./models/my_autoencoder", + VanillaAutoencoder +) +``` + +### Comprehensive Visualization +```python +from udl_toolbox.visualization import ReconstructionVisualizer, LossVisualizer + +# Reconstruction analysis +recon_vis = ReconstructionVisualizer(autoencoder) +recon_vis.plot_reconstruction_comparison(test_data, num_samples=5) +recon_vis.plot_reconstruction_error_distribution(test_data) +recon_vis.print_reconstruction_summary(test_data) + +# Training analysis +loss_vis = LossVisualizer() +loss_vis.plot_training_curves(history) +loss_vis.plot_loss_components(history) +loss_vis.plot_training_summary(history) +``` + +## ๐Ÿงช Running Examples and Tests + +### Comprehensive Example +```bash +python examples/comprehensive_example.py +``` + +This will run demonstrations of all autoencoder types with synthetic data. + +### Tests +```bash +python tests/test_basic_functionality.py +``` + +Run the test suite to verify all components work correctly. + +## ๐Ÿ“ Project Structure + +``` +udl_toolbox/ +โ”œโ”€โ”€ autoencoders/ # Autoencoder implementations +โ”‚ โ”œโ”€โ”€ base.py # Base autoencoder class +โ”‚ โ”œโ”€โ”€ vanilla.py # Vanilla autoencoder +โ”‚ โ”œโ”€โ”€ sparse.py # Sparse autoencoder +โ”‚ โ”œโ”€โ”€ denoising.py # Denoising autoencoder +โ”‚ โ”œโ”€โ”€ variational.py # Variational autoencoder +โ”‚ โ””โ”€โ”€ convolutional.py # Convolutional autoencoder +โ”œโ”€โ”€ losses/ # Custom loss functions +โ”‚ โ”œโ”€โ”€ reconstruction.py # Reconstruction losses +โ”‚ โ”œโ”€โ”€ regularization.py # Regularization terms +โ”‚ โ””โ”€โ”€ vae_loss.py # VAE-specific losses +โ”œโ”€โ”€ projections/ # Data projection utilities +โ”‚ โ”œโ”€โ”€ pca.py # PCA implementation +โ”‚ โ”œโ”€โ”€ tsne.py # t-SNE utilities +โ”‚ โ””โ”€โ”€ interpolation.py # Latent space interpolation +โ”œโ”€โ”€ visualization/ # Visualization tools +โ”‚ โ”œโ”€โ”€ latent_space.py # Latent space visualization +โ”‚ โ”œโ”€โ”€ reconstruction.py # Reconstruction quality +โ”‚ โ””โ”€โ”€ training.py # Training progress +โ””โ”€โ”€ utils/ # Utility functions + โ”œโ”€โ”€ data_preprocessing.py # Data preprocessing + โ””โ”€โ”€ model_io.py # Model I/O operations +``` + +## ๐ŸŽฏ Key Design Principles + +1. **Modular Architecture**: Each component is self-contained and reusable +2. **Custom Implementation**: All core algorithms implemented from scratch +3. **No Code Duplication**: Inheritance and composition prevent redundancy +4. **Comprehensive Testing**: Full test coverage for reliability +5. **Educational Focus**: Clear, documented code for learning +6. **Production Ready**: Efficient, scalable implementations + +## ๐Ÿค Use Cases + +- **Research**: Experiment with different autoencoder architectures +- **Education**: Learn autoencoder concepts with clear implementations +- **Prototyping**: Quickly test ideas with comprehensive tooling +- **Production**: Deploy robust autoencoder solutions +- **Analysis**: Deep dive into model behavior with visualization tools + +## ๐Ÿ“‹ Requirements + +- Python >= 3.8 +- TensorFlow >= 2.13.0 +- NumPy >= 1.21.0 +- Matplotlib >= 3.5.0 +- Seaborn >= 0.11.0 +- Scikit-learn >= 1.0.0 +- Pandas >= 1.3.0 +- Plotly >= 5.0.0 + +## ๐Ÿ†˜ Troubleshooting + +### Common Issues + +1. **Memory Issues with Large Models**: Use smaller batch sizes or gradient accumulation +2. **Convergence Problems**: Adjust learning rate, add batch normalization, or try different initializations +3. **Overfitting**: Increase regularization, add dropout, or reduce model complexity +4. **Slow Training**: Enable GPU acceleration, optimize data loading, or reduce model size + +### Performance Tips + +- Use mixed precision training for faster computation +- Preprocess data once and cache results +- Use TensorFlow data pipelines for efficient loading +- Monitor GPU utilization during training + +## ๐Ÿ“„ License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## ๐Ÿ™ Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change. + +## ๐Ÿ“ž Support + +If you encounter any issues or have questions, please open an issue on GitHub. + +--- + +**Happy Learning! ๐Ÿš€** diff --git a/examples/comprehensive_example.py b/examples/comprehensive_example.py new file mode 100644 index 0000000..b2f2438 --- /dev/null +++ b/examples/comprehensive_example.py @@ -0,0 +1,412 @@ +""" +Comprehensive example demonstrating the UDL Toolbox autoencoder implementations. +""" + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.datasets import make_blobs, fetch_openml +import tensorflow as tf + +# Import our autoencoder implementations +from udl_toolbox.autoencoders import ( + VanillaAutoencoder, + SparseAutoencoder, + DenoisingAutoencoder, + VariationalAutoencoder, + ConvolutionalAutoencoder +) + +# Import utilities +from udl_toolbox.utils import DataPreprocessor, ModelSaver +from udl_toolbox.visualization import ( + LatentSpaceVisualizer, + ReconstructionVisualizer, + LossVisualizer +) +from udl_toolbox.projections import PCAProjection, LatentSpaceInterpolation + + +def create_synthetic_data(n_samples=1000, n_features=50, noise_level=0.1): + """Create synthetic data for testing.""" + # Create clustered data + X, y = make_blobs(n_samples=n_samples, centers=3, n_features=n_features, + cluster_std=2.0, random_state=42) + + # Add noise + X += np.random.normal(0, noise_level, X.shape) + + return X, y + + +def example_vanilla_autoencoder(): + """Demonstrate vanilla autoencoder.""" + print("=== Vanilla Autoencoder Example ===") + + # Create data + X, y = create_synthetic_data(n_samples=500, n_features=20) + + # Preprocess data + preprocessor = DataPreprocessor(scaling_method='standard') + data_splits = preprocessor.prepare_data(X, validation_split=0.2) + + # Create autoencoder + autoencoder = VanillaAutoencoder( + input_dim=20, + latent_dim=5, + encoder_layers=[15, 10], + decoder_layers=[10, 15], + learning_rate=0.001 + ) + + print("Model architecture:") + autoencoder.summary() + + # Train + history = autoencoder.fit( + data_splits['train'], + validation_data=data_splits['validation'], + epochs=50, + batch_size=32, + verbose=10 + ) + + # Visualize results + vis = LatentSpaceVisualizer(autoencoder) + vis.plot_2d_latent_space(data_splits['train'], labels=y[:len(data_splits['train'])], + method='pca', title="Vanilla Autoencoder Latent Space") + + # Reconstruction visualization + recon_vis = ReconstructionVisualizer(autoencoder) + recon_vis.print_reconstruction_summary(data_splits['validation']) + + # Loss visualization + loss_vis = LossVisualizer() + loss_vis.plot_training_curves(history, title="Vanilla Autoencoder Training") + + return autoencoder, preprocessor + + +def example_sparse_autoencoder(): + """Demonstrate sparse autoencoder.""" + print("\n=== Sparse Autoencoder Example ===") + + # Create data + X, y = create_synthetic_data(n_samples=500, n_features=20) + + # Preprocess data + preprocessor = DataPreprocessor(scaling_method='minmax') # Better for sparse AE + data_splits = preprocessor.prepare_data(X, validation_split=0.2) + + # Create sparse autoencoder + autoencoder = SparseAutoencoder( + input_dim=20, + latent_dim=10, + encoder_layers=[15], + decoder_layers=[15], + sparsity_target=0.05, + sparsity_weight=1.0, + activation='sigmoid' # Better for sparsity + ) + + # Train + history = autoencoder.fit( + data_splits['train'], + validation_data=data_splits['validation'], + epochs=50, + batch_size=32, + verbose=10 + ) + + # Analyze sparsity + autoencoder.visualize_sparsity(data_splits['train']) + + # Visualize latent space + vis = LatentSpaceVisualizer(autoencoder) + vis.plot_2d_latent_space(data_splits['train'], labels=y[:len(data_splits['train'])], + method='pca', title="Sparse Autoencoder Latent Space") + + return autoencoder + + +def example_denoising_autoencoder(): + """Demonstrate denoising autoencoder.""" + print("\n=== Denoising Autoencoder Example ===") + + # Create data + X, y = create_synthetic_data(n_samples=500, n_features=20) + + # Preprocess data + preprocessor = DataPreprocessor(scaling_method='minmax') + data_splits = preprocessor.prepare_data(X, validation_split=0.2) + + # Create denoising autoencoder + autoencoder = DenoisingAutoencoder( + input_dim=20, + latent_dim=8, + encoder_layers=[15, 12], + decoder_layers=[12, 15], + noise_type='gaussian', + noise_level=0.2 + ) + + # Train + history = autoencoder.fit( + data_splits['train'], + validation_data=data_splits['validation'], + epochs=50, + batch_size=32, + verbose=10 + ) + + # Test denoising capability + clean_test = data_splits['validation'][:10] + noisy_test = autoencoder.add_noise(clean_test, training=True) + denoising_metrics = autoencoder.test_denoising(clean_test, noisy_test) + + print(f"Denoising performance:") + for metric, value in denoising_metrics.items(): + print(f" {metric}: {value:.4f}") + + return autoencoder + + +def example_variational_autoencoder(): + """Demonstrate variational autoencoder.""" + print("\n=== Variational Autoencoder Example ===") + + # Create data + X, y = create_synthetic_data(n_samples=500, n_features=20) + + # Preprocess data + preprocessor = DataPreprocessor(scaling_method='standard') + data_splits = preprocessor.prepare_data(X, validation_split=0.2) + + # Create VAE + autoencoder = VariationalAutoencoder( + input_dim=20, + latent_dim=5, + encoder_layers=[15, 10], + decoder_layers=[10, 15], + beta=1.0, + reconstruction_loss='mse' + ) + + # Train + history = autoencoder.fit( + data_splits['train'], + validation_data=data_splits['validation'], + epochs=50, + batch_size=32, + verbose=10 + ) + + # Generate new samples + print("Generating new samples...") + generated_samples = autoencoder.generate(num_samples=5) + print(f"Generated samples shape: {generated_samples.shape}") + + # Analyze latent statistics + latent_stats = autoencoder.get_latent_statistics(data_splits['train']) + print("Latent space statistics:") + for stat, value in latent_stats.items(): + if hasattr(value, 'shape'): + print(f" {stat}: shape {value.shape}, mean {np.mean(value):.4f}") + else: + print(f" {stat}: {value:.4f}") + + # Interpolation example + if len(data_splits['train']) >= 2: + interpolator = LatentSpaceInterpolation(autoencoder) + z_interp, x_interp = interpolator.interpolate_data_points( + data_splits['train'][0], data_splits['train'][1], num_steps=5 + ) + print(f"Interpolation result shape: {x_interp.shape}") + + return autoencoder + + +def example_image_autoencoder(): + """Demonstrate convolutional autoencoder with image data.""" + print("\n=== Convolutional Autoencoder Example ===") + + # Create simple synthetic image data + def create_synthetic_images(n_samples=200, img_size=(28, 28)): + """Create synthetic image data with geometric patterns.""" + images = np.zeros((n_samples, img_size[0], img_size[1], 1)) + + for i in range(n_samples): + # Create random geometric patterns + img = np.zeros(img_size) + + # Add random rectangles + for _ in range(np.random.randint(1, 4)): + x1, y1 = np.random.randint(0, img_size[1]//2, 2) + x2, y2 = x1 + np.random.randint(5, img_size[1]//4), y1 + np.random.randint(5, img_size[0]//4) + x2, y2 = min(x2, img_size[1]), min(y2, img_size[0]) + img[y1:y2, x1:x2] = np.random.random() + + images[i, :, :, 0] = img + + return images + + # Create synthetic image data + images = create_synthetic_images(n_samples=200) + + # Preprocess images + preprocessor = DataPreprocessor() + images_processed = preprocessor.preprocess_images(images, normalize=True) + data_splits = preprocessor.prepare_data(images_processed, validation_split=0.2) + + # Create convolutional autoencoder + autoencoder = ConvolutionalAutoencoder( + input_shape=(28, 28, 1), + latent_dim=10, + encoder_filters=[16, 32], + decoder_filters=[32, 16], + kernel_size=3, + strides=2 + ) + + print("Convolutional autoencoder architecture:") + autoencoder.summary() + + # Train + history = autoencoder.fit( + data_splits['train'], + validation_data=data_splits['validation'], + epochs=20, + batch_size=16, + verbose=5 + ) + + # Visualize reconstructions + recon_vis = ReconstructionVisualizer(autoencoder) + recon_vis.plot_reconstruction_comparison( + data_splits['validation'][:5], + num_samples=5, + title="Convolutional Autoencoder Reconstructions" + ) + + return autoencoder + + +def example_model_saving_loading(): + """Demonstrate model saving and loading.""" + print("\n=== Model Saving and Loading Example ===") + + # Create and train a simple autoencoder + X, _ = create_synthetic_data(n_samples=200, n_features=10) + preprocessor = DataPreprocessor() + data_splits = preprocessor.prepare_data(X, validation_split=0.2) + + autoencoder = VanillaAutoencoder( + input_dim=10, + latent_dim=3, + encoder_layers=[8, 5], + decoder_layers=[5, 8] + ) + + # Train briefly + autoencoder.fit(data_splits['train'], epochs=10, verbose=0) + + # Save model + saver = ModelSaver() + save_path = "/tmp/test_autoencoder" + saver.save_autoencoder(autoencoder, save_path, save_format='weights') + + # Create new instance and load weights + new_autoencoder = VanillaAutoencoder( + input_dim=10, + latent_dim=3, + encoder_layers=[8, 5], + decoder_layers=[5, 8] + ) + + # Load weights + new_autoencoder.load_weights(f"{save_path}_autoencoder_weights.h5") + + # Verify they produce same results + original_output = autoencoder.reconstruct(data_splits['validation'][:5]) + loaded_output = new_autoencoder.reconstruct(data_splits['validation'][:5]) + + mse = np.mean((original_output - loaded_output) ** 2) + print(f"MSE between original and loaded model outputs: {mse:.10f}") + + if mse < 1e-6: + print("โœ“ Model loading successful!") + else: + print("โœ— Model loading failed!") + + +def example_projection_analysis(): + """Demonstrate data projection utilities.""" + print("\n=== Data Projection Analysis Example ===") + + # Create high-dimensional data + X, y = create_synthetic_data(n_samples=300, n_features=50) + + # PCA projection + pca = PCAProjection(n_components=2) + X_pca = pca.fit_transform(X) + + print(f"PCA explained variance ratio: {pca.get_explained_variance_ratio()}") + print(f"Cumulative variance: {pca.get_cumulative_variance_ratio()}") + + # Plot PCA results + plt.figure(figsize=(12, 5)) + + plt.subplot(1, 2, 1) + scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='tab10', alpha=0.7) + plt.colorbar(scatter) + plt.title('PCA Projection') + plt.xlabel('PC 1') + plt.ylabel('PC 2') + + plt.subplot(1, 2, 2) + plt.plot(range(1, len(pca.get_explained_variance_ratio()) + 1), + pca.get_cumulative_variance_ratio(), 'o-') + plt.xlabel('Number of Components') + plt.ylabel('Cumulative Explained Variance') + plt.title('PCA Explained Variance') + plt.grid(True, alpha=0.3) + + plt.tight_layout() + plt.show() + + +def main(): + """Run all examples.""" + print("UDL Toolbox - Comprehensive Autoencoder Examples") + print("=" * 50) + + # Set random seeds for reproducibility + np.random.seed(42) + tf.random.set_seed(42) + + # Run examples + try: + # Basic autoencoders + vanilla_ae, preprocessor = example_vanilla_autoencoder() + sparse_ae = example_sparse_autoencoder() + denoising_ae = example_denoising_autoencoder() + vae = example_variational_autoencoder() + + # Convolutional autoencoder + conv_ae = example_image_autoencoder() + + # Utility examples + example_model_saving_loading() + example_projection_analysis() + + print("\n" + "=" * 50) + print("All examples completed successfully! โœ“") + print("The UDL Toolbox is ready for use.") + + except Exception as e: + print(f"\nError running examples: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b3a8ca1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,72 @@ +[build-system] +requires = ["setuptools>=45", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "unsupervised_deep_learning_toolbox" +version = "0.1.0" +authors = [ + {name = "UDL Toolbox Team", email = "team@udltoolbox.com"} +] +description = "Comprehensive autoencoder implementations with custom loss functions and visualization tools" +readme = "README.md" +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "tensorflow>=2.13.0", + "numpy>=1.21.0", + "matplotlib>=3.5.0", + "seaborn>=0.11.0", + "scikit-learn>=1.0.0", + "pandas>=1.3.0", + "plotly>=5.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=6.0", + "pytest-cov>=2.0", + "black>=22.0", + "flake8>=4.0", + "isort>=5.0", +] +jupyter = [ + "jupyter>=1.0.0", + "ipywidgets>=7.0.0", +] + +[project.urls] +Homepage = "https://github.com/Smveer/unsupervised_deep_learning_toolbox" +Repository = "https://github.com/Smveer/unsupervised_deep_learning_toolbox" +Documentation = "https://github.com/Smveer/unsupervised_deep_learning_toolbox#readme" + +[tool.setuptools.packages.find] +include = ["udl_toolbox*"] + +[tool.black] +line-length = 88 +target-version = ['py38'] + +[tool.isort] +profile = "black" +multi_line_output = 3 +line_length = 88 + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = "--cov=udl_toolbox --cov-report=html --cov-report=term-missing" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e08eaee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +tensorflow>=2.13.0 +numpy>=1.21.0 +matplotlib>=3.5.0 +seaborn>=0.11.0 +scikit-learn>=1.0.0 +pandas>=1.3.0 +jupyter>=1.0.0 +plotly>=5.0.0 \ No newline at end of file diff --git a/tests/test_basic_functionality.py b/tests/test_basic_functionality.py new file mode 100644 index 0000000..15572fb --- /dev/null +++ b/tests/test_basic_functionality.py @@ -0,0 +1,345 @@ +""" +Basic tests for the UDL Toolbox autoencoder implementations. +""" + +import numpy as np +import tensorflow as tf +import unittest +import tempfile +import os + +# Import the modules to test +from udl_toolbox.autoencoders import ( + VanillaAutoencoder, + SparseAutoencoder, + DenoisingAutoencoder, + VariationalAutoencoder, + ConvolutionalAutoencoder +) +from udl_toolbox.losses import ( + MeanSquaredError, + BinaryCrossentropy, + KLDivergence, + SparsityRegularization, + VAELoss +) +from udl_toolbox.projections import PCAProjection, LatentSpaceInterpolation +from udl_toolbox.utils import DataPreprocessor, ModelSaver + + +class TestLossFunctions(unittest.TestCase): + """Test custom loss functions.""" + + def setUp(self): + self.y_true = tf.constant([[1.0, 0.0, 1.0], [0.0, 1.0, 0.0]]) + self.y_pred = tf.constant([[0.9, 0.1, 0.8], [0.2, 0.8, 0.1]]) + + def test_mse_loss(self): + """Test MSE loss function.""" + mse = MeanSquaredError() + loss = mse(self.y_true, self.y_pred) + + # Manually compute expected MSE + expected = tf.reduce_mean(tf.square(self.y_true - self.y_pred)) + + self.assertAlmostEqual(float(loss), float(expected), places=5) + + def test_binary_crossentropy_loss(self): + """Test binary crossentropy loss function.""" + bce = BinaryCrossentropy() + loss = bce(self.y_true, self.y_pred) + + # Should be a positive scalar + self.assertGreater(float(loss), 0) + self.assertEqual(loss.shape, ()) + + def test_kl_divergence(self): + """Test KL divergence loss.""" + kl = KLDivergence() + mu = tf.constant([[0.1, -0.2, 0.3], [0.0, 0.1, -0.1]]) + log_var = tf.constant([[-1.0, -0.5, -1.5], [-0.8, -1.2, -0.9]]) + + loss = kl.standard_normal_kl(mu, log_var) + + # KL divergence should be non-negative + self.assertGreaterEqual(float(loss), 0) + + def test_sparsity_regularization(self): + """Test sparsity regularization.""" + sparsity = SparsityRegularization(sparsity_target=0.05, sparsity_weight=1.0) + activations = tf.constant([[0.1, 0.0, 0.05, 0.8], [0.0, 0.02, 0.0, 0.1]]) + + loss = sparsity(activations) + + # Should be a positive scalar + self.assertGreater(float(loss), 0) + + +class TestAutoencoders(unittest.TestCase): + """Test autoencoder implementations.""" + + def setUp(self): + # Create simple test data + np.random.seed(42) + tf.random.set_seed(42) + self.data = np.random.random((50, 10)).astype(np.float32) + self.image_data = np.random.random((20, 16, 16, 1)).astype(np.float32) + + def test_vanilla_autoencoder(self): + """Test vanilla autoencoder.""" + ae = VanillaAutoencoder( + input_dim=10, + latent_dim=5, + encoder_layers=[8], + decoder_layers=[8] + ) + + # Test encoding + encoded = ae.encode(self.data) + self.assertEqual(encoded.shape, (50, 5)) + + # Test decoding + decoded = ae.decode(encoded) + self.assertEqual(decoded.shape, (50, 10)) + + # Test reconstruction + reconstructed = ae.reconstruct(self.data) + self.assertEqual(reconstructed.shape, self.data.shape) + + # Test training (brief) + history = ae.fit(self.data, epochs=2, verbose=0) + self.assertIn('loss', history) + self.assertEqual(len(history['loss']), 2) + + def test_sparse_autoencoder(self): + """Test sparse autoencoder.""" + ae = SparseAutoencoder( + input_dim=10, + latent_dim=8, + encoder_layers=[], + decoder_layers=[], + sparsity_target=0.05, + sparsity_weight=1.0 + ) + + # Test basic functionality + encoded = ae.encode(self.data) + self.assertEqual(encoded.shape, (50, 8)) + + # Test sparsity statistics + stats = ae.get_sparsity_statistics(self.data) + self.assertIn('sparsity_ratio', stats) + self.assertIn('active_neurons', stats) + + def test_denoising_autoencoder(self): + """Test denoising autoencoder.""" + ae = DenoisingAutoencoder( + input_dim=10, + latent_dim=5, + encoder_layers=[8], + decoder_layers=[8], + noise_type='gaussian', + noise_level=0.1 + ) + + # Test noise addition + noisy_data = ae.add_noise(self.data, training=True) + self.assertEqual(noisy_data.shape, self.data.shape) + self.assertNotEqual(np.sum(np.abs(noisy_data - self.data)), 0) + + # Test denoising metrics + metrics = ae.test_denoising(self.data[:5]) + self.assertIn('mse_noisy', metrics) + self.assertIn('mse_denoised', metrics) + + def test_variational_autoencoder(self): + """Test variational autoencoder.""" + vae = VariationalAutoencoder( + input_dim=10, + latent_dim=5, + encoder_layers=[8], + decoder_layers=[8], + beta=1.0 + ) + + # Test encoding with distribution + z_mean, z_log_var, z = vae.encode(self.data, return_distribution=True) + self.assertEqual(z_mean.shape, (50, 5)) + self.assertEqual(z_log_var.shape, (50, 5)) + self.assertEqual(z.shape, (50, 5)) + + # Test generation + generated = vae.generate(num_samples=5) + self.assertEqual(generated.shape, (5, 10)) + + # Test interpolation + if len(self.data) >= 2: + interpolated = vae.interpolate( + self.data[0:1], self.data[1:2], num_steps=3 + ) + self.assertEqual(interpolated.shape, (3, 10)) + + def test_convolutional_autoencoder(self): + """Test convolutional autoencoder.""" + cae = ConvolutionalAutoencoder( + input_shape=(16, 16, 1), + latent_dim=8, + encoder_filters=[8, 16], + decoder_filters=[16, 8], + kernel_size=3, + strides=2 + ) + + # Test with image data + encoded = cae.encode_images(self.image_data) + self.assertEqual(encoded.shape, (20, 8)) + + decoded = cae.decode_images(encoded) + self.assertEqual(decoded.shape, self.image_data.shape) + + # Test receptive field calculation + rf = cae.calculate_receptive_field() + self.assertIsInstance(rf, int) + self.assertGreater(rf, 0) + + +class TestProjections(unittest.TestCase): + """Test projection utilities.""" + + def setUp(self): + np.random.seed(42) + self.data = np.random.random((100, 20)) + + def test_pca_projection(self): + """Test PCA projection.""" + pca = PCAProjection(n_components=5) + + # Test fit and transform + transformed = pca.fit_transform(self.data) + self.assertEqual(transformed.shape, (100, 5)) + + # Test inverse transform + reconstructed = pca.inverse_transform(transformed) + self.assertEqual(reconstructed.shape, self.data.shape) + + # Test explained variance + var_ratio = pca.get_explained_variance_ratio() + self.assertEqual(len(var_ratio), 5) + self.assertTrue(all(var_ratio >= 0)) + self.assertTrue(all(var_ratio <= 1)) + + def test_latent_space_interpolation(self): + """Test latent space interpolation.""" + # Create a simple autoencoder for testing + ae = VanillaAutoencoder( + input_dim=20, + latent_dim=5, + encoder_layers=[10], + decoder_layers=[10] + ) + + interpolator = LatentSpaceInterpolation(ae) + + # Test linear interpolation + z1 = np.random.random(5) + z2 = np.random.random(5) + + z_interp = interpolator.linear_interpolation(z1, z2, num_steps=5) + self.assertEqual(z_interp.shape, (5, 5)) + + # Test data point interpolation + x1 = self.data[0] + x2 = self.data[1] + + z_interp, x_interp = interpolator.interpolate_data_points(x1, x2, num_steps=3) + self.assertEqual(z_interp.shape, (3, 5)) + self.assertEqual(x_interp.shape, (3, 20)) + + +class TestUtils(unittest.TestCase): + """Test utility classes.""" + + def setUp(self): + np.random.seed(42) + self.data = np.random.random((100, 10)) + + def test_data_preprocessor(self): + """Test data preprocessor.""" + preprocessor = DataPreprocessor(scaling_method='standard') + + # Test data preparation + splits = preprocessor.prepare_data(self.data, validation_split=0.2) + + self.assertIn('train', splits) + self.assertIn('validation', splits) + self.assertEqual(splits['train'].shape[0], 80) + self.assertEqual(splits['validation'].shape[0], 20) + + # Test transform + new_data = np.random.random((10, 10)) + transformed = preprocessor.transform(new_data) + self.assertEqual(transformed.shape, new_data.shape) + + # Test noise addition + noisy = preprocessor.add_noise(self.data, noise_type='gaussian', noise_level=0.1) + self.assertEqual(noisy.shape, self.data.shape) + self.assertGreater(np.mean(np.abs(noisy - self.data)), 0) + + def test_model_saver(self): + """Test model saving and loading.""" + # Create a simple autoencoder + ae = VanillaAutoencoder( + input_dim=10, + latent_dim=5, + encoder_layers=[8], + decoder_layers=[8] + ) + + # Train briefly + ae.fit(self.data, epochs=1, verbose=0) + + # Test saving + saver = ModelSaver() + + with tempfile.TemporaryDirectory() as temp_dir: + save_path = os.path.join(temp_dir, "test_model") + + # Save model + saver.save_autoencoder(ae, save_path, save_format='weights') + + # Check files exist + self.assertTrue(os.path.exists(f"{save_path}_config.json")) + self.assertTrue(os.path.exists(f"{save_path}_autoencoder_weights.h5")) + + +def run_tests(): + """Run all tests.""" + # Create test suite + test_suite = unittest.TestSuite() + + # Add test cases + test_classes = [ + TestLossFunctions, + TestAutoencoders, + TestProjections, + TestUtils + ] + + for test_class in test_classes: + tests = unittest.TestLoader().loadTestsFromTestCase(test_class) + test_suite.addTests(tests) + + # Run tests + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(test_suite) + + return result.wasSuccessful() + + +if __name__ == "__main__": + success = run_tests() + if success: + print("\nโœ“ All tests passed!") + else: + print("\nโœ— Some tests failed!") + exit(0 if success else 1) \ No newline at end of file diff --git a/udl_toolbox/__init__.py b/udl_toolbox/__init__.py new file mode 100644 index 0000000..0366907 --- /dev/null +++ b/udl_toolbox/__init__.py @@ -0,0 +1,60 @@ +""" +UDL Toolbox - Unsupervised Deep Learning Toolbox + +A comprehensive implementation of autoencoder architectures with custom loss functions, +data projection utilities, and visualization tools. +""" + +__version__ = "0.1.0" +__author__ = "UDL Toolbox Team" + +from .autoencoders.base import BaseAutoencoder +from .autoencoders.vanilla import VanillaAutoencoder +from .autoencoders.sparse import SparseAutoencoder +from .autoencoders.denoising import DenoisingAutoencoder +from .autoencoders.variational import VariationalAutoencoder +from .autoencoders.convolutional import ConvolutionalAutoencoder + +from .losses import ( + MeanSquaredError, + BinaryCrossentropy, + KLDivergence, + SparsityRegularization, + VAELoss, +) + +from .projections import ( + PCAProjection, + TSNEProjection, + LatentSpaceInterpolation, +) + +from .visualization import ( + LatentSpaceVisualizer, + ReconstructionVisualizer, + LossVisualizer, +) + +__all__ = [ + # Autoencoders + "BaseAutoencoder", + "VanillaAutoencoder", + "SparseAutoencoder", + "DenoisingAutoencoder", + "VariationalAutoencoder", + "ConvolutionalAutoencoder", + # Loss functions + "MeanSquaredError", + "BinaryCrossentropy", + "KLDivergence", + "SparsityRegularization", + "VAELoss", + # Projections + "PCAProjection", + "TSNEProjection", + "LatentSpaceInterpolation", + # Visualization + "LatentSpaceVisualizer", + "ReconstructionVisualizer", + "LossVisualizer", +] \ No newline at end of file diff --git a/udl_toolbox/autoencoders/__init__.py b/udl_toolbox/autoencoders/__init__.py new file mode 100644 index 0000000..4f868ad --- /dev/null +++ b/udl_toolbox/autoencoders/__init__.py @@ -0,0 +1,17 @@ +"""Autoencoder implementations.""" + +from .base import BaseAutoencoder +from .vanilla import VanillaAutoencoder +from .sparse import SparseAutoencoder +from .denoising import DenoisingAutoencoder +from .variational import VariationalAutoencoder +from .convolutional import ConvolutionalAutoencoder + +__all__ = [ + "BaseAutoencoder", + "VanillaAutoencoder", + "SparseAutoencoder", + "DenoisingAutoencoder", + "VariationalAutoencoder", + "ConvolutionalAutoencoder", +] \ No newline at end of file diff --git a/udl_toolbox/autoencoders/base.py b/udl_toolbox/autoencoders/base.py new file mode 100644 index 0000000..96dacbd --- /dev/null +++ b/udl_toolbox/autoencoders/base.py @@ -0,0 +1,315 @@ +""" +Base autoencoder class providing common functionality for all autoencoder variants. +""" + +import tensorflow as tf +import numpy as np +from abc import ABC, abstractmethod +from typing import Dict, List, Optional, Tuple, Callable, Any +import warnings + + +class BaseAutoencoder(ABC): + """ + Abstract base class for all autoencoder implementations. + + This class provides common functionality including: + - Model building framework + - Training loop with custom gradient computation + - Encoding and decoding methods + - Model saving and loading + - Visualization hooks + """ + + def __init__( + self, + input_dim: int, + latent_dim: int, + encoder_layers: List[int], + decoder_layers: List[int], + activation: str = 'relu', + output_activation: str = 'sigmoid', + learning_rate: float = 0.001, + name: str = "autoencoder" + ): + """ + Initialize the base autoencoder. + + Args: + input_dim: Dimension of input data + latent_dim: Dimension of latent space + encoder_layers: List of hidden layer sizes for encoder + decoder_layers: List of hidden layer sizes for decoder + activation: Activation function for hidden layers + output_activation: Activation function for output layer + learning_rate: Learning rate for optimizer + name: Name of the model + """ + self.input_dim = input_dim + self.latent_dim = latent_dim + self.encoder_layers = encoder_layers + self.decoder_layers = decoder_layers + self.activation = activation + self.output_activation = output_activation + self.learning_rate = learning_rate + self.name = name + + # Initialize components + self.encoder = None + self.decoder = None + self.autoencoder = None + self.optimizer = None + self.history = { + 'loss': [], + 'reconstruction_loss': [], + 'regularization_loss': [] + } + + # Build the model + self._build_model() + + @abstractmethod + def _build_encoder(self) -> tf.keras.Model: + """Build the encoder network.""" + pass + + @abstractmethod + def _build_decoder(self) -> tf.keras.Model: + """Build the decoder network.""" + pass + + @abstractmethod + def _compute_loss(self, x: tf.Tensor, x_reconstructed: tf.Tensor, **kwargs) -> Dict[str, tf.Tensor]: + """Compute the total loss and individual loss components.""" + pass + + def _build_model(self): + """Build the complete autoencoder model.""" + self.encoder = self._build_encoder() + self.decoder = self._build_decoder() + + # Build the full autoencoder + inputs = tf.keras.Input(shape=(self.input_dim,), name=f"{self.name}_input") + encoded = self.encoder(inputs) + decoded = self.decoder(encoded) + + self.autoencoder = tf.keras.Model(inputs, decoded, name=self.name) + + # Initialize optimizer + self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate) + + def encode(self, x: tf.Tensor) -> tf.Tensor: + """ + Encode input data to latent space. + + Args: + x: Input data tensor + + Returns: + Encoded representation in latent space + """ + return self.encoder(x) + + def decode(self, z: tf.Tensor) -> tf.Tensor: + """ + Decode latent representation to data space. + + Args: + z: Latent space tensor + + Returns: + Reconstructed data + """ + return self.decoder(z) + + def reconstruct(self, x: tf.Tensor) -> tf.Tensor: + """ + Reconstruct input data through encoder-decoder pipeline. + + Args: + x: Input data tensor + + Returns: + Reconstructed data + """ + return self.autoencoder(x) + + @tf.function + def _train_step(self, x: tf.Tensor) -> Dict[str, tf.Tensor]: + """ + Perform a single training step. + + Args: + x: Input data batch + + Returns: + Dictionary of loss values + """ + with tf.GradientTape() as tape: + # Forward pass + x_reconstructed = self.autoencoder(x, training=True) + + # Compute losses + losses = self._compute_loss(x, x_reconstructed) + total_loss = losses['total_loss'] + + # Compute gradients + gradients = tape.gradient(total_loss, self.autoencoder.trainable_variables) + + # Apply gradients + self.optimizer.apply_gradients(zip(gradients, self.autoencoder.trainable_variables)) + + return losses + + def fit( + self, + x_train: np.ndarray, + batch_size: int = 32, + epochs: int = 100, + validation_data: Optional[np.ndarray] = None, + verbose: int = 1, + callbacks: Optional[List[Callable]] = None + ) -> Dict[str, List[float]]: + """ + Train the autoencoder. + + Args: + x_train: Training data + batch_size: Size of training batches + epochs: Number of training epochs + validation_data: Optional validation data + verbose: Verbosity level (0, 1, or 2) + callbacks: Optional list of callback functions + + Returns: + Training history dictionary + """ + # Convert to tensor dataset + train_dataset = tf.data.Dataset.from_tensor_slices(x_train) + train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + + if validation_data is not None: + val_dataset = tf.data.Dataset.from_tensor_slices(validation_data) + val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + + # Training loop + for epoch in range(epochs): + epoch_losses = [] + + # Training phase + for batch in train_dataset: + losses = self._train_step(batch) + epoch_losses.append({k: float(v) for k, v in losses.items()}) + + # Compute average losses for epoch + avg_losses = {} + for key in epoch_losses[0].keys(): + avg_losses[key] = np.mean([loss[key] for loss in epoch_losses]) + + # Store in history + self.history['loss'].append(avg_losses['total_loss']) + self.history['reconstruction_loss'].append(avg_losses.get('reconstruction_loss', 0.0)) + self.history['regularization_loss'].append(avg_losses.get('regularization_loss', 0.0)) + + # Validation phase + if validation_data is not None: + val_losses = [] + for val_batch in val_dataset: + val_reconstructed = self.autoencoder(val_batch, training=False) + val_loss_dict = self._compute_loss(val_batch, val_reconstructed) + val_losses.append({k: float(v) for k, v in val_loss_dict.items()}) + + avg_val_loss = np.mean([loss['total_loss'] for loss in val_losses]) + + if 'val_loss' not in self.history: + self.history['val_loss'] = [] + self.history['val_loss'].append(avg_val_loss) + + # Print progress + if verbose > 0 and (epoch + 1) % verbose == 0: + print(f"Epoch {epoch + 1}/{epochs}") + print(f" Loss: {avg_losses['total_loss']:.6f}") + if 'reconstruction_loss' in avg_losses: + print(f" Reconstruction Loss: {avg_losses['reconstruction_loss']:.6f}") + if 'regularization_loss' in avg_losses: + print(f" Regularization Loss: {avg_losses['regularization_loss']:.6f}") + if validation_data is not None: + print(f" Val Loss: {avg_val_loss:.6f}") + print() + + # Execute callbacks + if callbacks: + for callback in callbacks: + callback(epoch, avg_losses, self) + + return self.history + + def save(self, filepath: str) -> None: + """ + Save the autoencoder model. + + Args: + filepath: Path to save the model + """ + self.autoencoder.save(filepath) + + def load(self, filepath: str) -> None: + """ + Load a saved autoencoder model. + + Args: + filepath: Path to the saved model + """ + self.autoencoder = tf.keras.models.load_model(filepath) + # Extract encoder and decoder from loaded model + # This is a simplified approach - in practice, you might want to save/load + # encoder and decoder separately for more flexibility + warnings.warn( + "Loading complete models may not preserve encoder/decoder separation. " + "Consider using save_weights/load_weights for better compatibility." + ) + + def save_weights(self, filepath: str) -> None: + """ + Save only the model weights. + + Args: + filepath: Path to save the weights + """ + self.autoencoder.save_weights(filepath) + + def load_weights(self, filepath: str) -> None: + """ + Load saved model weights. + + Args: + filepath: Path to the saved weights + """ + self.autoencoder.load_weights(filepath) + + def get_config(self) -> Dict[str, Any]: + """ + Get the configuration dictionary for this autoencoder. + + Returns: + Configuration dictionary + """ + return { + 'input_dim': self.input_dim, + 'latent_dim': self.latent_dim, + 'encoder_layers': self.encoder_layers, + 'decoder_layers': self.decoder_layers, + 'activation': self.activation, + 'output_activation': self.output_activation, + 'learning_rate': self.learning_rate, + 'name': self.name + } + + def summary(self) -> None: + """Print model summaries.""" + print("=== ENCODER ===") + self.encoder.summary() + print("\n=== DECODER ===") + self.decoder.summary() + print("\n=== AUTOENCODER ===") + self.autoencoder.summary() \ No newline at end of file diff --git a/udl_toolbox/autoencoders/convolutional.py b/udl_toolbox/autoencoders/convolutional.py new file mode 100644 index 0000000..85bf98a --- /dev/null +++ b/udl_toolbox/autoencoders/convolutional.py @@ -0,0 +1,334 @@ +""" +Convolutional autoencoder implementation for image data. +""" + +import tensorflow as tf +import numpy as np +from typing import Dict, List, Tuple, Union +from .base import BaseAutoencoder +from ..losses.reconstruction import MeanSquaredError, BinaryCrossentropy + + +class ConvolutionalAutoencoder(BaseAutoencoder): + """ + Convolutional autoencoder for image data. + + Uses convolutional layers for encoding and transposed convolutions + (deconvolutions) for decoding, preserving spatial structure. + """ + + def __init__( + self, + input_shape: Tuple[int, int, int], # (height, width, channels) + latent_dim: int, + encoder_filters: List[int] = None, + decoder_filters: List[int] = None, + kernel_size: Union[int, Tuple[int, int]] = 3, + strides: Union[int, Tuple[int, int]] = 2, + activation: str = 'relu', + output_activation: str = 'sigmoid', + learning_rate: float = 0.001, + loss_type: str = 'mse', + dropout_rate: float = 0.0, + use_batch_norm: bool = True, + name: str = "convolutional_autoencoder" + ): + """ + Initialize convolutional autoencoder. + + Args: + input_shape: Shape of input images (height, width, channels) + latent_dim: Dimension of latent space + encoder_filters: List of filter counts for encoder conv layers + decoder_filters: List of filter counts for decoder conv layers + kernel_size: Size of convolutional kernels + strides: Stride for convolutions + activation: Activation function for hidden layers + output_activation: Activation function for output layer + learning_rate: Learning rate for optimizer + loss_type: Type of reconstruction loss + dropout_rate: Dropout rate for regularization + use_batch_norm: Whether to use batch normalization + name: Name of the model + """ + self.input_shape = input_shape + self.kernel_size = kernel_size + self.strides = strides + self.dropout_rate = dropout_rate + self.use_batch_norm = use_batch_norm + self.loss_type = loss_type + + # Set default filter configurations + if encoder_filters is None: + encoder_filters = [32, 64, 128] + if decoder_filters is None: + decoder_filters = encoder_filters[::-1] + + self.encoder_filters = encoder_filters + self.decoder_filters = decoder_filters + + # Calculate input dimension for base class + input_dim = np.prod(input_shape) + + # Initialize loss function + if loss_type == 'mse': + self.loss_fn = MeanSquaredError() + elif loss_type == 'binary_crossentropy': + self.loss_fn = BinaryCrossentropy() + else: + raise ValueError(f"Unknown loss type: {loss_type}") + + # Calculate encoded shape after convolutions + self.encoded_shape = self._calculate_encoded_shape() + + super().__init__( + input_dim=input_dim, + latent_dim=latent_dim, + encoder_layers=[], # Not used for conv layers + decoder_layers=[], # Not used for conv layers + activation=activation, + output_activation=output_activation, + learning_rate=learning_rate, + name=name + ) + + def _calculate_encoded_shape(self) -> Tuple[int, int, int]: + """Calculate the shape after all encoder convolutions.""" + h, w, c = self.input_shape + + for _ in self.encoder_filters: + if isinstance(self.strides, int): + h = h // self.strides + w = w // self.strides + else: + h = h // self.strides[0] + w = w // self.strides[1] + + # Final number of channels is the last filter count + final_channels = self.encoder_filters[-1] + + return (h, w, final_channels) + + def _build_encoder(self) -> tf.keras.Model: + """Build the convolutional encoder network.""" + inputs = tf.keras.Input(shape=self.input_shape, name=f"{self.name}_encoder_input") + x = inputs + + # Convolutional layers + for i, filters in enumerate(self.encoder_filters): + x = tf.keras.layers.Conv2D( + filters, + kernel_size=self.kernel_size, + strides=self.strides, + padding='same', + activation=self.activation, + name=f"{self.name}_encoder_conv_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_encoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_encoder_dropout_{i}" + )(x) + + # Flatten for dense layer + x_flattened = tf.keras.layers.Flatten(name=f"{self.name}_flatten")(x) + + # Dense layer to latent space + latent = tf.keras.layers.Dense( + self.latent_dim, + activation='linear', + name=f"{self.name}_latent" + )(x_flattened) + + return tf.keras.Model(inputs, latent, name=f"{self.name}_encoder") + + def _build_decoder(self) -> tf.keras.Model: + """Build the convolutional decoder network.""" + inputs = tf.keras.Input(shape=(self.latent_dim,), name=f"{self.name}_decoder_input") + + # Dense layer to reshape to encoded shape + encoded_size = np.prod(self.encoded_shape) + x = tf.keras.layers.Dense( + encoded_size, + activation=self.activation, + name=f"{self.name}_decoder_dense" + )(inputs) + + # Reshape to encoded shape + x = tf.keras.layers.Reshape( + self.encoded_shape, + name=f"{self.name}_reshape" + )(x) + + # Transposed convolutional layers + for i, filters in enumerate(self.decoder_filters[:-1]): # All but last + x = tf.keras.layers.Conv2DTranspose( + filters, + kernel_size=self.kernel_size, + strides=self.strides, + padding='same', + activation=self.activation, + name=f"{self.name}_decoder_conv_transpose_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_decoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_decoder_dropout_{i}" + )(x) + + # Final output layer + outputs = tf.keras.layers.Conv2DTranspose( + self.input_shape[2], # Output channels = input channels + kernel_size=self.kernel_size, + strides=self.strides, + padding='same', + activation=self.output_activation, + name=f"{self.name}_output" + )(x) + + return tf.keras.Model(inputs, outputs, name=f"{self.name}_decoder") + + def _compute_loss(self, x: tf.Tensor, x_reconstructed: tf.Tensor, **kwargs) -> Dict[str, tf.Tensor]: + """ + Compute the reconstruction loss for convolutional autoencoder. + + Args: + x: Original input images + x_reconstructed: Reconstructed images + + Returns: + Dictionary containing loss components + """ + reconstruction_loss = self.loss_fn(x, x_reconstructed) + + return { + 'total_loss': reconstruction_loss, + 'reconstruction_loss': reconstruction_loss, + 'regularization_loss': tf.constant(0.0) + } + + def encode_images(self, images: tf.Tensor) -> tf.Tensor: + """ + Encode images to latent space. + + Args: + images: Input images tensor + + Returns: + Encoded latent representations + """ + return self.encoder(images) + + def decode_images(self, latent: tf.Tensor) -> tf.Tensor: + """ + Decode latent representations to images. + + Args: + latent: Latent space tensor + + Returns: + Reconstructed images + """ + return self.decoder(latent) + + def reconstruct_images(self, images: tf.Tensor) -> tf.Tensor: + """ + Reconstruct images through encoder-decoder pipeline. + + Args: + images: Input images tensor + + Returns: + Reconstructed images + """ + return self.autoencoder(images) + + def get_feature_maps(self, images: tf.Tensor, layer_name: str = None) -> tf.Tensor: + """ + Get intermediate feature maps from encoder. + + Args: + images: Input images + layer_name: Name of layer to extract features from (if None, returns all) + + Returns: + Feature maps from specified layer + """ + if layer_name is None: + # Return all intermediate outputs + intermediate_model = tf.keras.Model( + inputs=self.encoder.input, + outputs=[layer.output for layer in self.encoder.layers if 'conv' in layer.name] + ) + return intermediate_model(images) + else: + # Return specific layer output + layer = self.encoder.get_layer(layer_name) + intermediate_model = tf.keras.Model( + inputs=self.encoder.input, + outputs=layer.output + ) + return intermediate_model(images) + + def calculate_receptive_field(self) -> int: + """ + Calculate the receptive field size of the encoder. + + Returns: + Receptive field size in pixels + """ + receptive_field = 1 + + for _ in self.encoder_filters: + if isinstance(self.kernel_size, int): + kernel = self.kernel_size + stride = self.strides if isinstance(self.strides, int) else self.strides[0] + else: + kernel = self.kernel_size[0] + stride = self.strides[0] if isinstance(self.strides, tuple) else self.strides + + receptive_field = (receptive_field - 1) * stride + kernel + + return receptive_field + + def get_config(self) -> Dict: + """Get configuration dictionary.""" + config = { + 'input_shape': self.input_shape, + 'latent_dim': self.latent_dim, + 'encoder_filters': self.encoder_filters, + 'decoder_filters': self.decoder_filters, + 'kernel_size': self.kernel_size, + 'strides': self.strides, + 'activation': self.activation, + 'output_activation': self.output_activation, + 'learning_rate': self.learning_rate, + 'loss_type': self.loss_type, + 'dropout_rate': self.dropout_rate, + 'use_batch_norm': self.use_batch_norm, + 'name': self.name + } + return config + + def summary(self) -> None: + """Print model summaries with shape information.""" + print("=== ENCODER ===") + self.encoder.summary() + print(f"\nEncoded shape: {self.encoded_shape}") + print(f"Receptive field: {self.calculate_receptive_field()} pixels") + print("\n=== DECODER ===") + self.decoder.summary() + print("\n=== AUTOENCODER ===") + self.autoencoder.summary() \ No newline at end of file diff --git a/udl_toolbox/autoencoders/denoising.py b/udl_toolbox/autoencoders/denoising.py new file mode 100644 index 0000000..e6f2486 --- /dev/null +++ b/udl_toolbox/autoencoders/denoising.py @@ -0,0 +1,281 @@ +""" +Denoising autoencoder implementation. +""" + +import tensorflow as tf +import numpy as np +from typing import Dict, List, Union +from .vanilla import VanillaAutoencoder + + +class DenoisingAutoencoder(VanillaAutoencoder): + """ + Denoising autoencoder that learns to reconstruct clean data from corrupted input. + + Corrupts input data with various types of noise and trains the model to + recover the original clean data, leading to more robust feature learning. + """ + + def __init__( + self, + input_dim: int, + latent_dim: int, + encoder_layers: List[int] = None, + decoder_layers: List[int] = None, + activation: str = 'relu', + output_activation: str = 'sigmoid', + learning_rate: float = 0.001, + loss_type: str = 'mse', + noise_type: str = 'gaussian', + noise_level: float = 0.1, + dropout_rate: float = 0.0, + use_batch_norm: bool = False, + name: str = "denoising_autoencoder" + ): + """ + Initialize denoising autoencoder. + + Args: + input_dim: Dimension of input data + latent_dim: Dimension of latent space + encoder_layers: List of hidden layer sizes for encoder + decoder_layers: List of hidden layer sizes for decoder + activation: Activation function for hidden layers + output_activation: Activation function for output layer + learning_rate: Learning rate for optimizer + loss_type: Type of reconstruction loss + noise_type: Type of noise to add ('gaussian', 'masking', 'salt_and_pepper') + noise_level: Level/intensity of noise to add + dropout_rate: Dropout rate for regularization + use_batch_norm: Whether to use batch normalization + name: Name of the model + """ + self.noise_type = noise_type + self.noise_level = noise_level + + super().__init__( + input_dim=input_dim, + latent_dim=latent_dim, + encoder_layers=encoder_layers, + decoder_layers=decoder_layers, + activation=activation, + output_activation=output_activation, + learning_rate=learning_rate, + loss_type=loss_type, + dropout_rate=dropout_rate, + use_batch_norm=use_batch_norm, + name=name + ) + + def add_noise(self, x: tf.Tensor, training: bool = True) -> tf.Tensor: + """ + Add noise to input data. + + Args: + x: Clean input data + training: Whether in training mode (noise only added during training) + + Returns: + Noisy input data + """ + if not training: + return x + + if self.noise_type == 'gaussian': + return self._add_gaussian_noise(x) + elif self.noise_type == 'masking': + return self._add_masking_noise(x) + elif self.noise_type == 'salt_and_pepper': + return self._add_salt_and_pepper_noise(x) + else: + raise ValueError(f"Unknown noise type: {self.noise_type}") + + def _add_gaussian_noise(self, x: tf.Tensor) -> tf.Tensor: + """Add Gaussian noise to input.""" + noise = tf.random.normal(tf.shape(x), mean=0.0, stddev=self.noise_level) + return x + noise + + def _add_masking_noise(self, x: tf.Tensor) -> tf.Tensor: + """Add masking noise (randomly set values to 0).""" + mask = tf.random.uniform(tf.shape(x)) > self.noise_level + return x * tf.cast(mask, x.dtype) + + def _add_salt_and_pepper_noise(self, x: tf.Tensor) -> tf.Tensor: + """Add salt and pepper noise.""" + # Generate random values + random_vals = tf.random.uniform(tf.shape(x)) + + # Salt noise (set to 1) + salt_mask = random_vals < self.noise_level / 2 + + # Pepper noise (set to 0) + pepper_mask = random_vals > (1 - self.noise_level / 2) + + # Apply noise + noisy_x = tf.where(salt_mask, tf.ones_like(x), x) + noisy_x = tf.where(pepper_mask, tf.zeros_like(noisy_x), noisy_x) + + return noisy_x + + @tf.function + def _train_step(self, x: tf.Tensor) -> Dict[str, tf.Tensor]: + """ + Perform a single training step with noise corruption. + + Args: + x: Clean input data batch + + Returns: + Dictionary of loss values + """ + with tf.GradientTape() as tape: + # Add noise to input + x_noisy = self.add_noise(x, training=True) + + # Forward pass with noisy input + x_reconstructed = self.autoencoder(x_noisy, training=True) + + # Compute loss against clean target + losses = self._compute_loss(x, x_reconstructed) + total_loss = losses['total_loss'] + + # Compute gradients + gradients = tape.gradient(total_loss, self.autoencoder.trainable_variables) + + # Apply gradients + self.optimizer.apply_gradients(zip(gradients, self.autoencoder.trainable_variables)) + + return losses + + def fit( + self, + x_train: np.ndarray, + batch_size: int = 32, + epochs: int = 100, + validation_data: Union[np.ndarray, None] = None, + verbose: int = 1, + callbacks: List = None + ) -> Dict[str, List[float]]: + """ + Train the denoising autoencoder. + + Args: + x_train: Clean training data + batch_size: Size of training batches + epochs: Number of training epochs + validation_data: Optional clean validation data + verbose: Verbosity level + callbacks: Optional list of callback functions + + Returns: + Training history dictionary + """ + # Convert to tensor dataset + train_dataset = tf.data.Dataset.from_tensor_slices(x_train) + train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + + if validation_data is not None: + val_dataset = tf.data.Dataset.from_tensor_slices(validation_data) + val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + + # Training loop + for epoch in range(epochs): + epoch_losses = [] + + # Training phase + for batch in train_dataset: + losses = self._train_step(batch) + epoch_losses.append({k: float(v) for k, v in losses.items()}) + + # Compute average losses for epoch + avg_losses = {} + for key in epoch_losses[0].keys(): + avg_losses[key] = np.mean([loss[key] for loss in epoch_losses]) + + # Store in history + self.history['loss'].append(avg_losses['total_loss']) + self.history['reconstruction_loss'].append(avg_losses.get('reconstruction_loss', 0.0)) + self.history['regularization_loss'].append(avg_losses.get('regularization_loss', 0.0)) + + # Validation phase (with clean data) + if validation_data is not None: + val_losses = [] + for val_batch in val_dataset: + # Add noise to validation input + val_noisy = self.add_noise(val_batch, training=False) # No noise during validation + val_reconstructed = self.autoencoder(val_noisy, training=False) + val_loss_dict = self._compute_loss(val_batch, val_reconstructed) + val_losses.append({k: float(v) for k, v in val_loss_dict.items()}) + + avg_val_loss = np.mean([loss['total_loss'] for loss in val_losses]) + + if 'val_loss' not in self.history: + self.history['val_loss'] = [] + self.history['val_loss'].append(avg_val_loss) + + # Print progress + if verbose > 0 and (epoch + 1) % verbose == 0: + print(f"Epoch {epoch + 1}/{epochs}") + print(f" Loss: {avg_losses['total_loss']:.6f}") + if validation_data is not None: + print(f" Val Loss: {avg_val_loss:.6f}") + print() + + # Execute callbacks + if callbacks: + for callback in callbacks: + callback(epoch, avg_losses, self) + + return self.history + + def denoise(self, x_noisy: tf.Tensor) -> tf.Tensor: + """ + Denoise input data. + + Args: + x_noisy: Noisy input data + + Returns: + Denoised reconstruction + """ + return self.autoencoder(x_noisy, training=False) + + def test_denoising(self, x_clean: tf.Tensor, x_noisy: tf.Tensor = None) -> Dict[str, tf.Tensor]: + """ + Test denoising performance on clean/noisy data pairs. + + Args: + x_clean: Clean reference data + x_noisy: Noisy input data (if None, noise will be added to x_clean) + + Returns: + Dictionary with denoising metrics + """ + if x_noisy is None: + x_noisy = self.add_noise(x_clean, training=True) + + # Get reconstructions + x_denoised = self.denoise(x_noisy) + + # Compute metrics + mse_noisy = tf.reduce_mean(tf.square(x_clean - x_noisy)) + mse_denoised = tf.reduce_mean(tf.square(x_clean - x_denoised)) + + # Signal-to-noise ratio improvement + snr_improvement = 10 * tf.math.log(mse_noisy / mse_denoised) / tf.math.log(10.0) + + return { + 'mse_noisy': mse_noisy, + 'mse_denoised': mse_denoised, + 'snr_improvement_db': snr_improvement, + 'denoising_ratio': mse_noisy / mse_denoised + } + + def get_config(self) -> Dict: + """Get configuration dictionary.""" + config = super().get_config() + config.update({ + 'noise_type': self.noise_type, + 'noise_level': self.noise_level + }) + return config \ No newline at end of file diff --git a/udl_toolbox/autoencoders/sparse.py b/udl_toolbox/autoencoders/sparse.py new file mode 100644 index 0000000..f7fb4ed --- /dev/null +++ b/udl_toolbox/autoencoders/sparse.py @@ -0,0 +1,246 @@ +""" +Sparse autoencoder implementation with sparsity regularization. +""" + +import tensorflow as tf +from typing import Dict, List +from .vanilla import VanillaAutoencoder +from ..losses.regularization import SparsityRegularization + + +class SparseAutoencoder(VanillaAutoencoder): + """ + Sparse autoencoder that encourages sparse activations in the hidden layer. + + Uses KL divergence-based sparsity regularization to learn sparse representations, + which can lead to more meaningful and interpretable feature learning. + """ + + def __init__( + self, + input_dim: int, + latent_dim: int, + encoder_layers: List[int] = None, + decoder_layers: List[int] = None, + activation: str = 'sigmoid', # Sigmoid is typically used for sparsity + output_activation: str = 'sigmoid', + learning_rate: float = 0.001, + loss_type: str = 'mse', + sparsity_target: float = 0.05, + sparsity_weight: float = 1.0, + dropout_rate: float = 0.0, + use_batch_norm: bool = False, + name: str = "sparse_autoencoder" + ): + """ + Initialize sparse autoencoder. + + Args: + input_dim: Dimension of input data + latent_dim: Dimension of latent space + encoder_layers: List of hidden layer sizes for encoder + decoder_layers: List of hidden layer sizes for decoder + activation: Activation function for hidden layers (sigmoid recommended) + output_activation: Activation function for output layer + learning_rate: Learning rate for optimizer + loss_type: Type of reconstruction loss + sparsity_target: Target average activation (rho) + sparsity_weight: Weight for sparsity regularization (beta) + dropout_rate: Dropout rate for regularization + use_batch_norm: Whether to use batch normalization + name: Name of the model + """ + self.sparsity_target = sparsity_target + self.sparsity_weight = sparsity_weight + + # Initialize sparsity regularization + self.sparsity_fn = SparsityRegularization( + sparsity_target=sparsity_target, + sparsity_weight=sparsity_weight + ) + + super().__init__( + input_dim=input_dim, + latent_dim=latent_dim, + encoder_layers=encoder_layers, + decoder_layers=decoder_layers, + activation=activation, + output_activation=output_activation, + learning_rate=learning_rate, + loss_type=loss_type, + dropout_rate=dropout_rate, + use_batch_norm=use_batch_norm, + name=name + ) + + # Store activations for sparsity computation + self.hidden_activations = None + + def _build_encoder(self) -> tf.keras.Model: + """Build the encoder network with sparsity monitoring.""" + inputs = tf.keras.Input(shape=(self.input_dim,), name=f"{self.name}_encoder_input") + x = inputs + + # Hidden layers + for i, units in enumerate(self.encoder_layers): + x = tf.keras.layers.Dense( + units, + activation=self.activation, + name=f"{self.name}_encoder_dense_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_encoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_encoder_dropout_{i}" + )(x) + + # Latent layer with activation (important for sparsity) + latent = tf.keras.layers.Dense( + self.latent_dim, + activation=self.activation, # Use activation (typically sigmoid) for sparsity + name=f"{self.name}_latent" + )(x) + + encoder = tf.keras.Model(inputs, latent, name=f"{self.name}_encoder") + + # Create a model that also outputs the last hidden layer for sparsity computation + if self.encoder_layers: + # If we have hidden layers, use the last one for sparsity + hidden_output = encoder.layers[-2].output # Second to last layer (before latent) + else: + # If no hidden layers, use the latent layer itself + hidden_output = latent + + self.encoder_with_hidden = tf.keras.Model( + inputs, + [latent, hidden_output], + name=f"{self.name}_encoder_with_hidden" + ) + + return encoder + + @tf.function + def _train_step(self, x: tf.Tensor) -> Dict[str, tf.Tensor]: + """ + Perform a single training step with sparsity regularization. + + Args: + x: Input data batch + + Returns: + Dictionary of loss values + """ + with tf.GradientTape() as tape: + # Forward pass through encoder (get both latent and hidden activations) + latent, hidden_activations = self.encoder_with_hidden(x, training=True) + + # Forward pass through decoder + x_reconstructed = self.decoder(latent, training=True) + + # Compute reconstruction loss + reconstruction_loss = self.loss_fn(x, x_reconstructed) + + # Compute sparsity loss + sparsity_loss = self.sparsity_fn(hidden_activations) + + # Total loss + total_loss = reconstruction_loss + sparsity_loss + + # Compute gradients + gradients = tape.gradient(total_loss, self.autoencoder.trainable_variables) + + # Apply gradients + self.optimizer.apply_gradients(zip(gradients, self.autoencoder.trainable_variables)) + + return { + 'total_loss': total_loss, + 'reconstruction_loss': reconstruction_loss, + 'regularization_loss': sparsity_loss, + 'sparsity_loss': sparsity_loss + } + + def _compute_loss(self, x: tf.Tensor, x_reconstructed: tf.Tensor, **kwargs) -> Dict[str, tf.Tensor]: + """ + Compute the total loss including sparsity regularization. + + Args: + x: Original input data + x_reconstructed: Reconstructed data from autoencoder + + Returns: + Dictionary containing loss components + """ + # Reconstruction loss + reconstruction_loss = self.loss_fn(x, x_reconstructed) + + # Get hidden activations for sparsity computation + _, hidden_activations = self.encoder_with_hidden(x, training=False) + sparsity_loss = self.sparsity_fn(hidden_activations) + + # Total loss + total_loss = reconstruction_loss + sparsity_loss + + return { + 'total_loss': total_loss, + 'reconstruction_loss': reconstruction_loss, + 'regularization_loss': sparsity_loss, + 'sparsity_loss': sparsity_loss + } + + def get_sparsity_statistics(self, x: tf.Tensor) -> Dict[str, tf.Tensor]: + """ + Get sparsity statistics for the hidden layer. + + Args: + x: Input data + + Returns: + Dictionary with sparsity statistics + """ + _, hidden_activations = self.encoder_with_hidden(x, training=False) + + # Compute average activation per neuron + avg_activation = tf.reduce_mean(hidden_activations, axis=0) + + # Compute sparsity metrics + sparsity_ratio = tf.reduce_mean(tf.cast(hidden_activations < 0.1, tf.float32)) + active_neurons = tf.reduce_sum(tf.cast(avg_activation > 0.1, tf.float32)) + + return { + 'average_activation': avg_activation, + 'sparsity_ratio': sparsity_ratio, + 'active_neurons': active_neurons, + 'total_neurons': tf.constant(float(hidden_activations.shape[-1])) + } + + def visualize_sparsity(self, x: tf.Tensor) -> None: + """ + Print sparsity statistics. + + Args: + x: Input data for analysis + """ + stats = self.get_sparsity_statistics(x) + + print("=== Sparsity Statistics ===") + print(f"Target sparsity: {self.sparsity_target:.3f}") + print(f"Actual sparsity ratio: {float(stats['sparsity_ratio']):.3f}") + print(f"Active neurons: {int(stats['active_neurons'])}/{int(stats['total_neurons'])}") + print(f"Average activation: {float(tf.reduce_mean(stats['average_activation'])):.6f}") + print(f"Min activation: {float(tf.reduce_min(stats['average_activation'])):.6f}") + print(f"Max activation: {float(tf.reduce_max(stats['average_activation'])):.6f}") + + def get_config(self) -> Dict: + """Get configuration dictionary.""" + config = super().get_config() + config.update({ + 'sparsity_target': self.sparsity_target, + 'sparsity_weight': self.sparsity_weight + }) + return config \ No newline at end of file diff --git a/udl_toolbox/autoencoders/vanilla.py b/udl_toolbox/autoencoders/vanilla.py new file mode 100644 index 0000000..1ca3c38 --- /dev/null +++ b/udl_toolbox/autoencoders/vanilla.py @@ -0,0 +1,205 @@ +""" +Vanilla (basic) autoencoder implementation. +""" + +import tensorflow as tf +from typing import Dict, List +from .base import BaseAutoencoder +from ..losses.reconstruction import MeanSquaredError, BinaryCrossentropy + + +class VanillaAutoencoder(BaseAutoencoder): + """ + Basic autoencoder with fully connected encoder and decoder networks. + + This is the simplest form of autoencoder that learns to compress + and reconstruct input data without any additional constraints. + """ + + def __init__( + self, + input_dim: int, + latent_dim: int, + encoder_layers: List[int] = None, + decoder_layers: List[int] = None, + activation: str = 'relu', + output_activation: str = 'sigmoid', + learning_rate: float = 0.001, + loss_type: str = 'mse', + dropout_rate: float = 0.0, + use_batch_norm: bool = False, + name: str = "vanilla_autoencoder" + ): + """ + Initialize vanilla autoencoder. + + Args: + input_dim: Dimension of input data + latent_dim: Dimension of latent space + encoder_layers: List of hidden layer sizes for encoder (default: [input_dim//2]) + decoder_layers: List of hidden layer sizes for decoder (default: symmetric to encoder) + activation: Activation function for hidden layers + output_activation: Activation function for output layer + learning_rate: Learning rate for optimizer + loss_type: Type of reconstruction loss ('mse' or 'binary_crossentropy') + dropout_rate: Dropout rate for regularization + use_batch_norm: Whether to use batch normalization + name: Name of the model + """ + # Set default layer configurations + if encoder_layers is None: + encoder_layers = [input_dim // 2] + if decoder_layers is None: + # Mirror encoder layers + decoder_layers = encoder_layers[::-1] + + self.loss_type = loss_type + self.dropout_rate = dropout_rate + self.use_batch_norm = use_batch_norm + + # Initialize loss function + if loss_type == 'mse': + self.loss_fn = MeanSquaredError() + elif loss_type == 'binary_crossentropy': + self.loss_fn = BinaryCrossentropy() + else: + raise ValueError(f"Unknown loss type: {loss_type}") + + super().__init__( + input_dim=input_dim, + latent_dim=latent_dim, + encoder_layers=encoder_layers, + decoder_layers=decoder_layers, + activation=activation, + output_activation=output_activation, + learning_rate=learning_rate, + name=name + ) + + def _build_encoder(self) -> tf.keras.Model: + """Build the encoder network.""" + inputs = tf.keras.Input(shape=(self.input_dim,), name=f"{self.name}_encoder_input") + x = inputs + + # Hidden layers + for i, units in enumerate(self.encoder_layers): + x = tf.keras.layers.Dense( + units, + activation=self.activation, + name=f"{self.name}_encoder_dense_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_encoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_encoder_dropout_{i}" + )(x) + + # Latent layer + latent = tf.keras.layers.Dense( + self.latent_dim, + activation='linear', # Linear activation for latent space + name=f"{self.name}_latent" + )(x) + + return tf.keras.Model(inputs, latent, name=f"{self.name}_encoder") + + def _build_decoder(self) -> tf.keras.Model: + """Build the decoder network.""" + inputs = tf.keras.Input(shape=(self.latent_dim,), name=f"{self.name}_decoder_input") + x = inputs + + # Hidden layers + for i, units in enumerate(self.decoder_layers): + x = tf.keras.layers.Dense( + units, + activation=self.activation, + name=f"{self.name}_decoder_dense_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_decoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_decoder_dropout_{i}" + )(x) + + # Output layer + outputs = tf.keras.layers.Dense( + self.input_dim, + activation=self.output_activation, + name=f"{self.name}_output" + )(x) + + return tf.keras.Model(inputs, outputs, name=f"{self.name}_decoder") + + def _compute_loss(self, x: tf.Tensor, x_reconstructed: tf.Tensor, **kwargs) -> Dict[str, tf.Tensor]: + """ + Compute the reconstruction loss for vanilla autoencoder. + + Args: + x: Original input data + x_reconstructed: Reconstructed data from autoencoder + + Returns: + Dictionary containing loss components + """ + reconstruction_loss = self.loss_fn(x, x_reconstructed) + + return { + 'total_loss': reconstruction_loss, + 'reconstruction_loss': reconstruction_loss, + 'regularization_loss': tf.constant(0.0) + } + + def get_config(self) -> Dict: + """Get configuration dictionary.""" + config = super().get_config() + config.update({ + 'loss_type': self.loss_type, + 'dropout_rate': self.dropout_rate, + 'use_batch_norm': self.use_batch_norm + }) + return config + + def compress(self, x: tf.Tensor, compression_ratio: float = None) -> tf.Tensor: + """ + Compress input data to latent representation. + + Args: + x: Input data + compression_ratio: If provided, calculates actual compression achieved + + Returns: + Compressed latent representation + """ + latent = self.encode(x) + + if compression_ratio is not None: + actual_ratio = self.input_dim / self.latent_dim + print(f"Compression ratio: {actual_ratio:.2f}:1") + if compression_ratio != actual_ratio: + print(f"Note: Requested ratio {compression_ratio:.2f}:1, actual ratio {actual_ratio:.2f}:1") + + return latent + + def decompress(self, latent: tf.Tensor) -> tf.Tensor: + """ + Decompress latent representation to original space. + + Args: + latent: Latent representation + + Returns: + Reconstructed data + """ + return self.decode(latent) \ No newline at end of file diff --git a/udl_toolbox/autoencoders/variational.py b/udl_toolbox/autoencoders/variational.py new file mode 100644 index 0000000..08a819f --- /dev/null +++ b/udl_toolbox/autoencoders/variational.py @@ -0,0 +1,347 @@ +""" +Variational autoencoder implementation. +""" + +import tensorflow as tf +import numpy as np +from typing import Dict, List, Tuple +from .base import BaseAutoencoder +from ..losses.vae_loss import VAELoss + + +class VariationalAutoencoder(BaseAutoencoder): + """ + Variational Autoencoder (VAE) implementation. + + VAE learns a probabilistic latent representation by encoding inputs as + distributions (mean and variance) rather than fixed points, and uses + the reparameterization trick for training. + """ + + def __init__( + self, + input_dim: int, + latent_dim: int, + encoder_layers: List[int] = None, + decoder_layers: List[int] = None, + activation: str = 'relu', + output_activation: str = 'sigmoid', + learning_rate: float = 0.001, + beta: float = 1.0, + reconstruction_loss: str = 'mse', + dropout_rate: float = 0.0, + use_batch_norm: bool = False, + name: str = "variational_autoencoder" + ): + """ + Initialize variational autoencoder. + + Args: + input_dim: Dimension of input data + latent_dim: Dimension of latent space + encoder_layers: List of hidden layer sizes for encoder + decoder_layers: List of hidden layer sizes for decoder + activation: Activation function for hidden layers + output_activation: Activation function for output layer + learning_rate: Learning rate for optimizer + beta: Beta parameter for KL divergence weighting + reconstruction_loss: Type of reconstruction loss ('mse' or 'binary_crossentropy') + dropout_rate: Dropout rate for regularization + use_batch_norm: Whether to use batch normalization + name: Name of the model + """ + # Set default layer configurations + if encoder_layers is None: + encoder_layers = [input_dim // 2] + if decoder_layers is None: + decoder_layers = encoder_layers[::-1] + + self.beta = beta + self.reconstruction_loss_type = reconstruction_loss + self.dropout_rate = dropout_rate + self.use_batch_norm = use_batch_norm + + # Initialize VAE loss + self.vae_loss_fn = VAELoss( + reconstruction_loss=reconstruction_loss, + beta=beta + ) + + super().__init__( + input_dim=input_dim, + latent_dim=latent_dim, + encoder_layers=encoder_layers, + decoder_layers=decoder_layers, + activation=activation, + output_activation=output_activation, + learning_rate=learning_rate, + name=name + ) + + def _build_encoder(self) -> tf.keras.Model: + """Build the encoder network that outputs mean and log variance.""" + inputs = tf.keras.Input(shape=(self.input_dim,), name=f"{self.name}_encoder_input") + x = inputs + + # Hidden layers + for i, units in enumerate(self.encoder_layers): + x = tf.keras.layers.Dense( + units, + activation=self.activation, + name=f"{self.name}_encoder_dense_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_encoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_encoder_dropout_{i}" + )(x) + + # Mean and log variance layers + z_mean = tf.keras.layers.Dense( + self.latent_dim, + activation='linear', + name=f"{self.name}_z_mean" + )(x) + + z_log_var = tf.keras.layers.Dense( + self.latent_dim, + activation='linear', + name=f"{self.name}_z_log_var" + )(x) + + # Sampling layer + z = self._sampling_layer([z_mean, z_log_var]) + + return tf.keras.Model(inputs, [z_mean, z_log_var, z], name=f"{self.name}_encoder") + + def _sampling_layer(self, args: List[tf.Tensor]) -> tf.Tensor: + """ + Reparameterization trick: sample from latent distribution. + + Args: + args: [z_mean, z_log_var] + + Returns: + Sampled latent vector + """ + z_mean, z_log_var = args + + # Use Keras Lambda layer for sampling + def sample(inputs): + z_mean, z_log_var = inputs + batch_size = tf.shape(z_mean)[0] + epsilon = tf.random.normal(shape=(batch_size, self.latent_dim)) + return z_mean + tf.exp(0.5 * z_log_var) * epsilon + + return tf.keras.layers.Lambda(sample, name=f"{self.name}_sampling")([z_mean, z_log_var]) + + def _build_decoder(self) -> tf.keras.Model: + """Build the decoder network.""" + inputs = tf.keras.Input(shape=(self.latent_dim,), name=f"{self.name}_decoder_input") + x = inputs + + # Hidden layers + for i, units in enumerate(self.decoder_layers): + x = tf.keras.layers.Dense( + units, + activation=self.activation, + name=f"{self.name}_decoder_dense_{i}" + )(x) + + if self.use_batch_norm: + x = tf.keras.layers.BatchNormalization( + name=f"{self.name}_decoder_bn_{i}" + )(x) + + if self.dropout_rate > 0: + x = tf.keras.layers.Dropout( + self.dropout_rate, + name=f"{self.name}_decoder_dropout_{i}" + )(x) + + # Output layer + outputs = tf.keras.layers.Dense( + self.input_dim, + activation=self.output_activation, + name=f"{self.name}_output" + )(x) + + return tf.keras.Model(inputs, outputs, name=f"{self.name}_decoder") + + def _build_model(self): + """Build the complete VAE model.""" + self.encoder = self._build_encoder() + self.decoder = self._build_decoder() + + # Build the full VAE + inputs = tf.keras.Input(shape=(self.input_dim,), name=f"{self.name}_input") + z_mean, z_log_var, z = self.encoder(inputs) + decoded = self.decoder(z) + + self.autoencoder = tf.keras.Model(inputs, decoded, name=self.name) + + # Create models for individual components + self.encoder_mean_var = tf.keras.Model(inputs, [z_mean, z_log_var], name=f"{self.name}_encoder_mean_var") + + # Initialize optimizer + self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate) + + def encode(self, x: tf.Tensor, return_distribution: bool = False) -> tf.Tensor: + """ + Encode input data to latent space. + + Args: + x: Input data tensor + return_distribution: If True, return (mean, log_var, sample), else just sample + + Returns: + Encoded representation(s) + """ + if return_distribution: + return self.encoder(x) + else: + z_mean, z_log_var, z = self.encoder(x) + return z + + def encode_mean(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: + """ + Encode input to mean and log variance (no sampling). + + Args: + x: Input data tensor + + Returns: + Tuple of (mean, log_var) + """ + return self.encoder_mean_var(x) + + @tf.function + def _train_step(self, x: tf.Tensor) -> Dict[str, tf.Tensor]: + """ + Perform a single training step for VAE. + + Args: + x: Input data batch + + Returns: + Dictionary of loss values + """ + with tf.GradientTape() as tape: + # Forward pass + z_mean, z_log_var, z = self.encoder(x, training=True) + x_reconstructed = self.decoder(z, training=True) + + # Compute VAE loss + losses = self.vae_loss_fn(x, x_reconstructed, z_mean, z_log_var) + total_loss = losses['total_loss'] + + # Compute gradients + gradients = tape.gradient(total_loss, self.autoencoder.trainable_variables) + + # Apply gradients + self.optimizer.apply_gradients(zip(gradients, self.autoencoder.trainable_variables)) + + return losses + + def _compute_loss(self, x: tf.Tensor, x_reconstructed: tf.Tensor, **kwargs) -> Dict[str, tf.Tensor]: + """ + Compute VAE loss components. + + Args: + x: Original input data + x_reconstructed: Reconstructed data + + Returns: + Dictionary containing loss components + """ + # Get latent parameters + z_mean, z_log_var = self.encoder_mean_var(x) + + # Compute VAE loss + return self.vae_loss_fn(x, x_reconstructed, z_mean, z_log_var) + + def generate(self, num_samples: int = 1, latent_samples: tf.Tensor = None) -> tf.Tensor: + """ + Generate new samples from the learned distribution. + + Args: + num_samples: Number of samples to generate + latent_samples: Optional latent samples (if None, sample from prior) + + Returns: + Generated samples + """ + if latent_samples is None: + # Sample from standard normal prior + latent_samples = tf.random.normal((num_samples, self.latent_dim)) + + return self.decoder(latent_samples, training=False) + + def interpolate(self, x1: tf.Tensor, x2: tf.Tensor, num_steps: int = 10) -> tf.Tensor: + """ + Interpolate between two points in latent space. + + Args: + x1: First input point + x2: Second input point + num_steps: Number of interpolation steps + + Returns: + Interpolated reconstructions + """ + # Encode to latent space (use mean, not sample) + z1_mean, _ = self.encoder_mean_var(x1) + z2_mean, _ = self.encoder_mean_var(x2) + + # Create interpolation ratios + ratios = tf.linspace(0.0, 1.0, num_steps) + ratios = tf.reshape(ratios, [-1, 1]) + + # Interpolate in latent space + z_interpolated = [] + for ratio in ratios: + z_interp = (1 - ratio) * z1_mean + ratio * z2_mean + z_interpolated.append(z_interp) + + z_interpolated = tf.concat(z_interpolated, axis=0) + + # Decode interpolated latent vectors + return self.decoder(z_interpolated, training=False) + + def get_latent_statistics(self, x: tf.Tensor) -> Dict[str, tf.Tensor]: + """ + Get statistics of the latent distribution. + + Args: + x: Input data + + Returns: + Dictionary with latent statistics + """ + z_mean, z_log_var = self.encoder_mean_var(x) + z_std = tf.exp(0.5 * z_log_var) + + return { + 'mean': tf.reduce_mean(z_mean, axis=0), + 'std': tf.reduce_mean(z_std, axis=0), + 'mean_of_means': tf.reduce_mean(z_mean), + 'mean_of_stds': tf.reduce_mean(z_std), + 'latent_dim_variance': tf.math.reduce_variance(z_mean, axis=0) + } + + def get_config(self) -> Dict: + """Get configuration dictionary.""" + config = super().get_config() + config.update({ + 'beta': self.beta, + 'reconstruction_loss': self.reconstruction_loss_type, + 'dropout_rate': self.dropout_rate, + 'use_batch_norm': self.use_batch_norm + }) + return config \ No newline at end of file diff --git a/udl_toolbox/losses/__init__.py b/udl_toolbox/losses/__init__.py new file mode 100644 index 0000000..5ccdd95 --- /dev/null +++ b/udl_toolbox/losses/__init__.py @@ -0,0 +1,13 @@ +"""Custom loss functions.""" + +from .reconstruction import MeanSquaredError, BinaryCrossentropy +from .regularization import KLDivergence, SparsityRegularization +from .vae_loss import VAELoss + +__all__ = [ + "MeanSquaredError", + "BinaryCrossentropy", + "KLDivergence", + "SparsityRegularization", + "VAELoss", +] \ No newline at end of file diff --git a/udl_toolbox/losses/reconstruction.py b/udl_toolbox/losses/reconstruction.py new file mode 100644 index 0000000..f578513 --- /dev/null +++ b/udl_toolbox/losses/reconstruction.py @@ -0,0 +1,227 @@ +""" +Custom reconstruction loss functions implemented from scratch. +""" + +import tensorflow as tf +from typing import Optional + + +class MeanSquaredError: + """ + Mean Squared Error loss function implemented from scratch. + + This is equivalent to L2 loss and is commonly used for continuous data. + """ + + def __init__(self, reduction: str = 'mean'): + """ + Initialize MSE loss. + + Args: + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.reduction = reduction + + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: + """ + Compute MSE loss. + + Args: + y_true: Ground truth values + y_pred: Predicted values + + Returns: + MSE loss tensor + """ + # Compute squared differences + squared_diff = tf.square(y_true - y_pred) + + # Apply reduction + if self.reduction == 'mean': + return tf.reduce_mean(squared_diff) + elif self.reduction == 'sum': + return tf.reduce_sum(squared_diff) + elif self.reduction == 'none': + return squared_diff + else: + raise ValueError(f"Unknown reduction: {self.reduction}") + + def get_config(self): + """Get configuration dictionary.""" + return {'reduction': self.reduction} + + +class BinaryCrossentropy: + """ + Binary crossentropy loss function implemented from scratch. + + Commonly used for binary data or when outputs are sigmoid-activated. + """ + + def __init__(self, from_logits: bool = False, epsilon: float = 1e-7, reduction: str = 'mean'): + """ + Initialize binary crossentropy loss. + + Args: + from_logits: Whether predictions are logits or probabilities + epsilon: Small constant to avoid log(0) + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.from_logits = from_logits + self.epsilon = epsilon + self.reduction = reduction + + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: + """ + Compute binary crossentropy loss. + + Args: + y_true: Ground truth values (0 or 1) + y_pred: Predicted values + + Returns: + Binary crossentropy loss tensor + """ + if self.from_logits: + # Apply sigmoid if inputs are logits + y_pred = tf.nn.sigmoid(y_pred) + + # Clip predictions to avoid log(0) + y_pred = tf.clip_by_value(y_pred, self.epsilon, 1.0 - self.epsilon) + + # Compute binary crossentropy: -[y*log(p) + (1-y)*log(1-p)] + bce = -(y_true * tf.math.log(y_pred) + (1.0 - y_true) * tf.math.log(1.0 - y_pred)) + + # Apply reduction + if self.reduction == 'mean': + return tf.reduce_mean(bce) + elif self.reduction == 'sum': + return tf.reduce_sum(bce) + elif self.reduction == 'none': + return bce + else: + raise ValueError(f"Unknown reduction: {self.reduction}") + + def get_config(self): + """Get configuration dictionary.""" + return { + 'from_logits': self.from_logits, + 'epsilon': self.epsilon, + 'reduction': self.reduction + } + + +class CategoricalCrossentropy: + """ + Categorical crossentropy loss function implemented from scratch. + + Used for multi-class classification problems. + """ + + def __init__(self, from_logits: bool = False, epsilon: float = 1e-7, reduction: str = 'mean'): + """ + Initialize categorical crossentropy loss. + + Args: + from_logits: Whether predictions are logits or probabilities + epsilon: Small constant to avoid log(0) + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.from_logits = from_logits + self.epsilon = epsilon + self.reduction = reduction + + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: + """ + Compute categorical crossentropy loss. + + Args: + y_true: Ground truth one-hot encoded labels + y_pred: Predicted probabilities or logits + + Returns: + Categorical crossentropy loss tensor + """ + if self.from_logits: + # Apply softmax if inputs are logits + y_pred = tf.nn.softmax(y_pred) + + # Clip predictions to avoid log(0) + y_pred = tf.clip_by_value(y_pred, self.epsilon, 1.0 - self.epsilon) + + # Compute categorical crossentropy: -sum(y * log(p)) + cce = -tf.reduce_sum(y_true * tf.math.log(y_pred), axis=-1) + + # Apply reduction + if self.reduction == 'mean': + return tf.reduce_mean(cce) + elif self.reduction == 'sum': + return tf.reduce_sum(cce) + elif self.reduction == 'none': + return cce + else: + raise ValueError(f"Unknown reduction: {self.reduction}") + + def get_config(self): + """Get configuration dictionary.""" + return { + 'from_logits': self.from_logits, + 'epsilon': self.epsilon, + 'reduction': self.reduction + } + + +class Huber: + """ + Huber loss function implemented from scratch. + + Combines MSE and MAE - less sensitive to outliers than MSE. + """ + + def __init__(self, delta: float = 1.0, reduction: str = 'mean'): + """ + Initialize Huber loss. + + Args: + delta: Threshold for switching between quadratic and linear loss + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.delta = delta + self.reduction = reduction + + def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: + """ + Compute Huber loss. + + Args: + y_true: Ground truth values + y_pred: Predicted values + + Returns: + Huber loss tensor + """ + error = y_true - y_pred + abs_error = tf.abs(error) + + # Quadratic loss for small errors, linear for large errors + quadratic = 0.5 * tf.square(error) + linear = self.delta * abs_error - 0.5 * tf.square(self.delta) + + huber_loss = tf.where(abs_error <= self.delta, quadratic, linear) + + # Apply reduction + if self.reduction == 'mean': + return tf.reduce_mean(huber_loss) + elif self.reduction == 'sum': + return tf.reduce_sum(huber_loss) + elif self.reduction == 'none': + return huber_loss + else: + raise ValueError(f"Unknown reduction: {self.reduction}") + + def get_config(self): + """Get configuration dictionary.""" + return { + 'delta': self.delta, + 'reduction': self.reduction + } \ No newline at end of file diff --git a/udl_toolbox/losses/regularization.py b/udl_toolbox/losses/regularization.py new file mode 100644 index 0000000..61515cc --- /dev/null +++ b/udl_toolbox/losses/regularization.py @@ -0,0 +1,248 @@ +""" +Custom regularization loss functions for autoencoders. +""" + +import tensorflow as tf +from typing import Optional + + +class KLDivergence: + """ + Kullback-Leibler divergence loss implemented from scratch. + + Commonly used in Variational Autoencoders to enforce latent space distribution. + """ + + def __init__(self, reduction: str = 'mean'): + """ + Initialize KL divergence loss. + + Args: + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.reduction = reduction + + def __call__( + self, + mu: tf.Tensor, + log_var: tf.Tensor, + prior_mu: Optional[tf.Tensor] = None, + prior_log_var: Optional[tf.Tensor] = None + ) -> tf.Tensor: + """ + Compute KL divergence between two multivariate Gaussian distributions. + + For VAE, this is typically KL(q(z|x) || p(z)) where: + - q(z|x) is the encoder distribution with parameters (mu, log_var) + - p(z) is the prior distribution (usually standard normal) + + Args: + mu: Mean of the approximate posterior + log_var: Log variance of the approximate posterior + prior_mu: Mean of the prior (defaults to 0) + prior_log_var: Log variance of the prior (defaults to 0) + + Returns: + KL divergence tensor + """ + if prior_mu is None: + prior_mu = tf.zeros_like(mu) + if prior_log_var is None: + prior_log_var = tf.zeros_like(log_var) + + # KL divergence formula for multivariate Gaussians: + # KL = 0.5 * sum(log(var_prior/var_posterior) + (var_posterior + (mu_posterior - mu_prior)^2) / var_prior - 1) + + # Convert log variances to variances + var = tf.exp(log_var) + prior_var = tf.exp(prior_log_var) + + # Compute KL divergence terms + log_var_ratio = prior_log_var - log_var + var_ratio = var / prior_var + mu_diff_squared = tf.square(mu - prior_mu) / prior_var + + # KL divergence per dimension + kl_per_dim = 0.5 * (log_var_ratio + var_ratio + mu_diff_squared - 1.0) + + # Sum over latent dimensions + kl = tf.reduce_sum(kl_per_dim, axis=-1) + + # Apply reduction + if self.reduction == 'mean': + return tf.reduce_mean(kl) + elif self.reduction == 'sum': + return tf.reduce_sum(kl) + elif self.reduction == 'none': + return kl + else: + raise ValueError(f"Unknown reduction: {self.reduction}") + + def standard_normal_kl(self, mu: tf.Tensor, log_var: tf.Tensor) -> tf.Tensor: + """ + Compute KL divergence with standard normal prior (most common case for VAE). + + Args: + mu: Mean of the approximate posterior + log_var: Log variance of the approximate posterior + + Returns: + KL divergence with N(0,I) prior + """ + # Simplified formula for standard normal prior + kl_per_dim = 0.5 * (tf.square(mu) + tf.exp(log_var) - log_var - 1.0) + kl = tf.reduce_sum(kl_per_dim, axis=-1) + + # Apply reduction + if self.reduction == 'mean': + return tf.reduce_mean(kl) + elif self.reduction == 'sum': + return tf.reduce_sum(kl) + elif self.reduction == 'none': + return kl + else: + raise ValueError(f"Unknown reduction: {self.reduction}") + + def get_config(self): + """Get configuration dictionary.""" + return {'reduction': self.reduction} + + +class SparsityRegularization: + """ + Sparsity regularization loss for sparse autoencoders. + + Encourages sparse activations in the hidden layer using KL divergence + between actual and target activation distributions. + """ + + def __init__(self, sparsity_target: float = 0.05, sparsity_weight: float = 1.0, reduction: str = 'mean'): + """ + Initialize sparsity regularization. + + Args: + sparsity_target: Target average activation (rho) + sparsity_weight: Weight for sparsity term (beta) + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.sparsity_target = sparsity_target + self.sparsity_weight = sparsity_weight + self.reduction = reduction + self.epsilon = 1e-8 # Small constant to avoid log(0) + + def __call__(self, activations: tf.Tensor) -> tf.Tensor: + """ + Compute sparsity regularization loss. + + Args: + activations: Hidden layer activations (batch_size, hidden_dim) + + Returns: + Sparsity regularization loss + """ + # Compute average activation for each neuron across the batch + rho_hat = tf.reduce_mean(activations, axis=0) + + # Clip to avoid numerical issues + rho_hat = tf.clip_by_value(rho_hat, self.epsilon, 1.0 - self.epsilon) + rho = tf.clip_by_value(self.sparsity_target, self.epsilon, 1.0 - self.epsilon) + + # KL divergence between Bernoulli distributions + # KL(rho || rho_hat) = rho * log(rho / rho_hat) + (1-rho) * log((1-rho) / (1-rho_hat)) + kl_div = (rho * tf.math.log(rho / rho_hat) + + (1.0 - rho) * tf.math.log((1.0 - rho) / (1.0 - rho_hat))) + + # Sum across all neurons + sparsity_loss = tf.reduce_sum(kl_div) + + # Apply sparsity weight + sparsity_loss = self.sparsity_weight * sparsity_loss + + # Note: reduction doesn't apply here as we want one scalar per batch + return sparsity_loss + + def get_config(self): + """Get configuration dictionary.""" + return { + 'sparsity_target': self.sparsity_target, + 'sparsity_weight': self.sparsity_weight, + 'reduction': self.reduction + } + + +class L1Regularization: + """ + L1 (Lasso) regularization loss. + + Encourages sparsity by penalizing the absolute values of parameters. + """ + + def __init__(self, l1_weight: float = 0.01, reduction: str = 'mean'): + """ + Initialize L1 regularization. + + Args: + l1_weight: Weight for L1 penalty + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.l1_weight = l1_weight + self.reduction = reduction + + def __call__(self, weights: tf.Tensor) -> tf.Tensor: + """ + Compute L1 regularization loss. + + Args: + weights: Model weights tensor + + Returns: + L1 regularization loss + """ + l1_loss = tf.reduce_sum(tf.abs(weights)) + return self.l1_weight * l1_loss + + def get_config(self): + """Get configuration dictionary.""" + return { + 'l1_weight': self.l1_weight, + 'reduction': self.reduction + } + + +class L2Regularization: + """ + L2 (Ridge) regularization loss. + + Prevents overfitting by penalizing large parameter values. + """ + + def __init__(self, l2_weight: float = 0.01, reduction: str = 'mean'): + """ + Initialize L2 regularization. + + Args: + l2_weight: Weight for L2 penalty + reduction: Type of reduction to apply ('mean', 'sum', 'none') + """ + self.l2_weight = l2_weight + self.reduction = reduction + + def __call__(self, weights: tf.Tensor) -> tf.Tensor: + """ + Compute L2 regularization loss. + + Args: + weights: Model weights tensor + + Returns: + L2 regularization loss + """ + l2_loss = tf.reduce_sum(tf.square(weights)) + return self.l2_weight * l2_loss + + def get_config(self): + """Get configuration dictionary.""" + return { + 'l2_weight': self.l2_weight, + 'reduction': self.reduction + } \ No newline at end of file diff --git a/udl_toolbox/losses/vae_loss.py b/udl_toolbox/losses/vae_loss.py new file mode 100644 index 0000000..abadd7a --- /dev/null +++ b/udl_toolbox/losses/vae_loss.py @@ -0,0 +1,163 @@ +""" +VAE-specific loss function combining reconstruction and KL divergence. +""" + +import tensorflow as tf +from .reconstruction import MeanSquaredError, BinaryCrossentropy +from .regularization import KLDivergence + + +class VAELoss: + """ + Variational Autoencoder loss function. + + Combines reconstruction loss with KL divergence regularization. + Loss = Reconstruction_Loss + ฮฒ * KL_Divergence + """ + + def __init__( + self, + reconstruction_loss: str = 'mse', + beta: float = 1.0, + reduction: str = 'mean', + **kwargs + ): + """ + Initialize VAE loss. + + Args: + reconstruction_loss: Type of reconstruction loss ('mse', 'binary_crossentropy') + beta: Weight for KL divergence term (ฮฒ-VAE parameter) + reduction: Type of reduction to apply + **kwargs: Additional arguments for reconstruction loss + """ + self.beta = beta + self.reduction = reduction + + # Initialize reconstruction loss + if reconstruction_loss == 'mse': + self.reconstruction_fn = MeanSquaredError(reduction=reduction) + elif reconstruction_loss == 'binary_crossentropy': + self.reconstruction_fn = BinaryCrossentropy(reduction=reduction, **kwargs) + else: + raise ValueError(f"Unknown reconstruction loss: {reconstruction_loss}") + + # Initialize KL divergence + self.kl_fn = KLDivergence(reduction=reduction) + + self.reconstruction_loss_type = reconstruction_loss + + def __call__( + self, + x_true: tf.Tensor, + x_reconstructed: tf.Tensor, + mu: tf.Tensor, + log_var: tf.Tensor + ) -> dict: + """ + Compute VAE loss. + + Args: + x_true: Original input data + x_reconstructed: Reconstructed data from decoder + mu: Mean of latent distribution from encoder + log_var: Log variance of latent distribution from encoder + + Returns: + Dictionary containing individual loss components and total loss + """ + # Reconstruction loss + reconstruction_loss = self.reconstruction_fn(x_true, x_reconstructed) + + # KL divergence loss (with standard normal prior) + kl_loss = self.kl_fn.standard_normal_kl(mu, log_var) + + # Total VAE loss + total_loss = reconstruction_loss + self.beta * kl_loss + + return { + 'total_loss': total_loss, + 'reconstruction_loss': reconstruction_loss, + 'kl_loss': kl_loss, + 'regularization_loss': self.beta * kl_loss + } + + def get_config(self): + """Get configuration dictionary.""" + config = { + 'reconstruction_loss': self.reconstruction_loss_type, + 'beta': self.beta, + 'reduction': self.reduction + } + return config + + +class BetaVAELoss(VAELoss): + """ + ฮฒ-VAE loss with adjustable ฮฒ parameter for disentanglement. + + Higher ฮฒ values encourage disentangled representations at the cost + of reconstruction quality. + """ + + def __init__(self, beta: float = 4.0, **kwargs): + """ + Initialize ฮฒ-VAE loss. + + Args: + beta: ฮฒ parameter controlling disentanglement vs reconstruction trade-off + **kwargs: Other arguments passed to VAELoss + """ + super().__init__(beta=beta, **kwargs) + + +class AnnealedVAELoss(VAELoss): + """ + VAE loss with annealed ฮฒ parameter. + + Gradually increases ฮฒ during training to balance reconstruction + and regularization. + """ + + def __init__( + self, + beta_start: float = 0.0, + beta_end: float = 1.0, + anneal_steps: int = 1000, + **kwargs + ): + """ + Initialize annealed VAE loss. + + Args: + beta_start: Initial ฮฒ value + beta_end: Final ฮฒ value + anneal_steps: Number of steps to anneal ฮฒ + **kwargs: Other arguments passed to VAELoss + """ + super().__init__(beta=beta_start, **kwargs) + self.beta_start = beta_start + self.beta_end = beta_end + self.anneal_steps = anneal_steps + self.current_step = 0 + + def update_beta(self, step: int): + """ + Update ฮฒ parameter based on current training step. + + Args: + step: Current training step + """ + self.current_step = step + if step >= self.anneal_steps: + self.beta = self.beta_end + else: + # Linear annealing + self.beta = self.beta_start + (self.beta_end - self.beta_start) * (step / self.anneal_steps) + + def __call__(self, x_true, x_reconstructed, mu, log_var): + """Compute annealed VAE loss.""" + # Use current ฮฒ value + old_beta = self.beta + result = super().__call__(x_true, x_reconstructed, mu, log_var) + return result \ No newline at end of file diff --git a/udl_toolbox/projections/__init__.py b/udl_toolbox/projections/__init__.py new file mode 100644 index 0000000..3b4906f --- /dev/null +++ b/udl_toolbox/projections/__init__.py @@ -0,0 +1,11 @@ +"""Data projection utilities.""" + +from .pca import PCAProjection +from .tsne import TSNEProjection +from .interpolation import LatentSpaceInterpolation + +__all__ = [ + "PCAProjection", + "TSNEProjection", + "LatentSpaceInterpolation", +] \ No newline at end of file diff --git a/udl_toolbox/projections/interpolation.py b/udl_toolbox/projections/interpolation.py new file mode 100644 index 0000000..9e164bc --- /dev/null +++ b/udl_toolbox/projections/interpolation.py @@ -0,0 +1,362 @@ +""" +Latent space interpolation utilities for autoencoders. +""" + +import tensorflow as tf +import numpy as np +from typing import List, Tuple, Optional, Callable +import matplotlib.pyplot as plt + + +class LatentSpaceInterpolation: + """ + Utilities for interpolating in autoencoder latent spaces. + + Provides various interpolation methods and visualization tools + for exploring the structure of learned latent representations. + """ + + def __init__(self, autoencoder): + """ + Initialize latent space interpolation utilities. + + Args: + autoencoder: Fitted autoencoder model with encode/decode methods + """ + self.autoencoder = autoencoder + + def linear_interpolation( + self, + z1: tf.Tensor, + z2: tf.Tensor, + num_steps: int = 10, + include_endpoints: bool = True + ) -> tf.Tensor: + """ + Perform linear interpolation between two latent points. + + Args: + z1: First latent point + z2: Second latent point + num_steps: Number of interpolation steps + include_endpoints: Whether to include start and end points + + Returns: + Interpolated latent points + """ + if include_endpoints: + alphas = np.linspace(0, 1, num_steps) + else: + alphas = np.linspace(0, 1, num_steps + 2)[1:-1] + + interpolated = [] + for alpha in alphas: + z_interp = (1 - alpha) * z1 + alpha * z2 + interpolated.append(z_interp) + + return tf.stack(interpolated) + + def spherical_interpolation( + self, + z1: tf.Tensor, + z2: tf.Tensor, + num_steps: int = 10, + include_endpoints: bool = True + ) -> tf.Tensor: + """ + Perform spherical linear interpolation (SLERP) between two latent points. + + Useful when latent space has spherical structure (e.g., normalized embeddings). + + Args: + z1: First latent point + z2: Second latent point + num_steps: Number of interpolation steps + include_endpoints: Whether to include start and end points + + Returns: + Spherically interpolated latent points + """ + # Normalize vectors + z1_norm = z1 / tf.norm(z1) + z2_norm = z2 / tf.norm(z2) + + # Compute angle between vectors + dot_product = tf.reduce_sum(z1_norm * z2_norm) + omega = tf.acos(tf.clip_by_value(dot_product, -1.0, 1.0)) + + if include_endpoints: + alphas = np.linspace(0, 1, num_steps) + else: + alphas = np.linspace(0, 1, num_steps + 2)[1:-1] + + interpolated = [] + for alpha in alphas: + if tf.abs(omega) < 1e-6: # Vectors are parallel + z_interp = (1 - alpha) * z1 + alpha * z2 + else: + sin_omega = tf.sin(omega) + z_interp = (tf.sin((1 - alpha) * omega) * z1 + tf.sin(alpha * omega) * z2) / sin_omega + interpolated.append(z_interp) + + return tf.stack(interpolated) + + def interpolate_data_points( + self, + x1: tf.Tensor, + x2: tf.Tensor, + num_steps: int = 10, + method: str = 'linear', + include_endpoints: bool = True + ) -> Tuple[tf.Tensor, tf.Tensor]: + """ + Interpolate between two data points via latent space. + + Args: + x1: First data point + x2: Second data point + num_steps: Number of interpolation steps + method: Interpolation method ('linear' or 'spherical') + include_endpoints: Whether to include start and end points + + Returns: + Tuple of (interpolated_latent, interpolated_reconstructions) + """ + # Encode data points to latent space + z1 = self.autoencoder.encode(tf.expand_dims(x1, 0))[0] + z2 = self.autoencoder.encode(tf.expand_dims(x2, 0))[0] + + # Interpolate in latent space + if method == 'linear': + z_interpolated = self.linear_interpolation(z1, z2, num_steps, include_endpoints) + elif method == 'spherical': + z_interpolated = self.spherical_interpolation(z1, z2, num_steps, include_endpoints) + else: + raise ValueError(f"Unknown interpolation method: {method}") + + # Decode interpolated latent points + x_interpolated = self.autoencoder.decode(z_interpolated) + + return z_interpolated, x_interpolated + + def latent_arithmetic( + self, + base_point: tf.Tensor, + direction_vector: tf.Tensor, + scales: List[float] = [-2, -1, 0, 1, 2] + ) -> Tuple[tf.Tensor, tf.Tensor]: + """ + Perform arithmetic operations in latent space. + + Useful for exploring semantic directions (e.g., "smile" direction in face images). + + Args: + base_point: Base latent point + direction_vector: Direction vector to add/subtract + scales: List of scaling factors for the direction + + Returns: + Tuple of (modified_latent_points, reconstructions) + """ + modified_points = [] + for scale in scales: + modified_point = base_point + scale * direction_vector + modified_points.append(modified_point) + + z_modified = tf.stack(modified_points) + x_reconstructed = self.autoencoder.decode(z_modified) + + return z_modified, x_reconstructed + + def find_semantic_direction( + self, + positive_examples: tf.Tensor, + negative_examples: tf.Tensor, + method: str = 'mean_difference' + ) -> tf.Tensor: + """ + Find semantic direction in latent space from examples. + + Args: + positive_examples: Data points with desired attribute + negative_examples: Data points without desired attribute + method: Method to compute direction ('mean_difference', 'svm') + + Returns: + Semantic direction vector + """ + # Encode examples to latent space + z_positive = self.autoencoder.encode(positive_examples) + z_negative = self.autoencoder.encode(negative_examples) + + if method == 'mean_difference': + # Simple mean difference + direction = tf.reduce_mean(z_positive, axis=0) - tf.reduce_mean(z_negative, axis=0) + elif method == 'svm': + # Use SVM to find separating hyperplane + from sklearn.svm import SVC + + # Prepare data for SVM + X = tf.concat([z_positive, z_negative], axis=0).numpy() + y = np.concatenate([np.ones(len(z_positive)), np.zeros(len(z_negative))]) + + # Fit SVM + svm = SVC(kernel='linear') + svm.fit(X, y) + + # Direction is the normal to the hyperplane + direction = tf.constant(svm.coef_[0], dtype=tf.float32) + else: + raise ValueError(f"Unknown method: {method}") + + # Normalize direction + direction = direction / tf.norm(direction) + + return direction + + def random_walk( + self, + start_point: tf.Tensor, + num_steps: int = 10, + step_size: float = 0.1, + random_seed: Optional[int] = None + ) -> Tuple[tf.Tensor, tf.Tensor]: + """ + Perform random walk in latent space. + + Args: + start_point: Starting latent point + num_steps: Number of steps in the walk + step_size: Size of each random step + random_seed: Random seed for reproducibility + + Returns: + Tuple of (walk_points, reconstructions) + """ + if random_seed is not None: + tf.random.set_seed(random_seed) + + current_point = start_point + walk_points = [current_point] + + for _ in range(num_steps): + # Generate random step + step = tf.random.normal(shape=tf.shape(current_point), stddev=step_size) + current_point = current_point + step + walk_points.append(current_point) + + walk_points = tf.stack(walk_points) + reconstructions = self.autoencoder.decode(walk_points) + + return walk_points, reconstructions + + def latent_space_neighbors( + self, + query_point: tf.Tensor, + latent_dataset: tf.Tensor, + k: int = 5, + metric: str = 'euclidean' + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + """ + Find k nearest neighbors in latent space. + + Args: + query_point: Query latent point + latent_dataset: Dataset of latent points to search + k: Number of neighbors to find + metric: Distance metric ('euclidean', 'cosine') + + Returns: + Tuple of (neighbor_indices, neighbor_points, distances) + """ + if metric == 'euclidean': + # Compute Euclidean distances + distances = tf.norm(latent_dataset - query_point, axis=1) + elif metric == 'cosine': + # Compute cosine distances + query_norm = tf.norm(query_point) + dataset_norms = tf.norm(latent_dataset, axis=1) + dot_products = tf.reduce_sum(latent_dataset * query_point, axis=1) + cosine_similarities = dot_products / (query_norm * dataset_norms) + distances = 1 - cosine_similarities + else: + raise ValueError(f"Unknown metric: {metric}") + + # Find k nearest neighbors + _, neighbor_indices = tf.nn.top_k(-distances, k=k) + neighbor_points = tf.gather(latent_dataset, neighbor_indices) + neighbor_distances = tf.gather(distances, neighbor_indices) + + return neighbor_indices, neighbor_points, neighbor_distances + + def interpolation_quality_metric( + self, + z_interpolated: tf.Tensor, + smoothness_weight: float = 1.0 + ) -> float: + """ + Compute quality metric for interpolation smoothness. + + Args: + z_interpolated: Interpolated latent points + smoothness_weight: Weight for smoothness term + + Returns: + Quality metric (lower is better) + """ + # Compute second derivatives (curvature) + if len(z_interpolated) < 3: + return 0.0 + + # Second differences approximation + second_diffs = z_interpolated[2:] - 2 * z_interpolated[1:-1] + z_interpolated[:-2] + curvature = tf.reduce_mean(tf.norm(second_diffs, axis=1)) + + return float(smoothness_weight * curvature) + + def visualize_interpolation( + self, + x_interpolated: tf.Tensor, + title: str = "Latent Space Interpolation", + figsize: Tuple[int, int] = (15, 3) + ) -> None: + """ + Visualize interpolation results. + + Args: + x_interpolated: Interpolated reconstructions + title: Plot title + figsize: Figure size + """ + n_samples = len(x_interpolated) + + plt.figure(figsize=figsize) + for i in range(n_samples): + plt.subplot(1, n_samples, i + 1) + + # Handle different data shapes + if len(x_interpolated[i].shape) == 1: + # 1D data - plot as line + plt.plot(x_interpolated[i]) + elif len(x_interpolated[i].shape) == 2: + # 2D data - show as heatmap + plt.imshow(x_interpolated[i], cmap='viridis') + elif len(x_interpolated[i].shape) == 3: + # 3D data (images) - show as image + if x_interpolated[i].shape[-1] == 1: + plt.imshow(x_interpolated[i][:, :, 0], cmap='gray') + else: + plt.imshow(x_interpolated[i]) + + plt.axis('off') + plt.title(f'Step {i}') + + plt.suptitle(title) + plt.tight_layout() + plt.show() + + def get_config(self) -> dict: + """Get configuration dictionary.""" + return { + 'autoencoder_type': type(self.autoencoder).__name__ + } \ No newline at end of file diff --git a/udl_toolbox/projections/pca.py b/udl_toolbox/projections/pca.py new file mode 100644 index 0000000..441d30a --- /dev/null +++ b/udl_toolbox/projections/pca.py @@ -0,0 +1,225 @@ +""" +PCA projection implementation from scratch. +""" + +import tensorflow as tf +import numpy as np +from typing import Optional, Tuple +from sklearn.preprocessing import StandardScaler + + +class PCAProjection: + """ + Principal Component Analysis (PCA) implementation from scratch. + + Reduces dimensionality by projecting data onto principal components + that capture the most variance in the data. + """ + + def __init__(self, n_components: int, standardize: bool = True): + """ + Initialize PCA projection. + + Args: + n_components: Number of principal components to keep + standardize: Whether to standardize data before PCA + """ + self.n_components = n_components + self.standardize = standardize + + # Fitted parameters + self.components_ = None + self.explained_variance_ = None + self.explained_variance_ratio_ = None + self.mean_ = None + self.scaler_ = None + self.is_fitted = False + + def fit(self, X: np.ndarray) -> 'PCAProjection': + """ + Fit PCA to the data. + + Args: + X: Input data of shape (n_samples, n_features) + + Returns: + Self for method chaining + """ + X = np.array(X) + + # Standardize if requested + if self.standardize: + self.scaler_ = StandardScaler() + X_processed = self.scaler_.fit_transform(X) + else: + X_processed = X.copy() + + # Center the data + self.mean_ = np.mean(X_processed, axis=0) + X_centered = X_processed - self.mean_ + + # Compute covariance matrix + n_samples = X_centered.shape[0] + cov_matrix = np.dot(X_centered.T, X_centered) / (n_samples - 1) + + # Compute eigenvalues and eigenvectors + eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix) + + # Sort by eigenvalues in descending order + idx = np.argsort(eigenvalues)[::-1] + eigenvalues = eigenvalues[idx] + eigenvectors = eigenvectors[:, idx] + + # Keep only the requested number of components + self.components_ = eigenvectors[:, :self.n_components].T + self.explained_variance_ = eigenvalues[:self.n_components] + self.explained_variance_ratio_ = self.explained_variance_ / np.sum(eigenvalues) + + self.is_fitted = True + return self + + def transform(self, X: np.ndarray) -> np.ndarray: + """ + Transform data using fitted PCA. + + Args: + X: Input data of shape (n_samples, n_features) + + Returns: + Transformed data of shape (n_samples, n_components) + """ + if not self.is_fitted: + raise ValueError("PCA must be fitted before transform") + + X = np.array(X) + + # Apply same preprocessing as during fit + if self.standardize: + X_processed = self.scaler_.transform(X) + else: + X_processed = X.copy() + + # Center the data + X_centered = X_processed - self.mean_ + + # Project onto principal components + return np.dot(X_centered, self.components_.T) + + def fit_transform(self, X: np.ndarray) -> np.ndarray: + """ + Fit PCA and transform data in one step. + + Args: + X: Input data of shape (n_samples, n_features) + + Returns: + Transformed data of shape (n_samples, n_components) + """ + return self.fit(X).transform(X) + + def inverse_transform(self, X_transformed: np.ndarray) -> np.ndarray: + """ + Transform data back to original space. + + Args: + X_transformed: Transformed data of shape (n_samples, n_components) + + Returns: + Reconstructed data of shape (n_samples, n_features) + """ + if not self.is_fitted: + raise ValueError("PCA must be fitted before inverse transform") + + X_transformed = np.array(X_transformed) + + # Project back to original space + X_reconstructed = np.dot(X_transformed, self.components_) + self.mean_ + + # Reverse standardization if applied + if self.standardize: + X_reconstructed = self.scaler_.inverse_transform(X_reconstructed) + + return X_reconstructed + + def get_explained_variance_ratio(self) -> np.ndarray: + """ + Get the explained variance ratio for each component. + + Returns: + Array of explained variance ratios + """ + if not self.is_fitted: + raise ValueError("PCA must be fitted first") + return self.explained_variance_ratio_ + + def get_cumulative_variance_ratio(self) -> np.ndarray: + """ + Get cumulative explained variance ratio. + + Returns: + Array of cumulative explained variance ratios + """ + if not self.is_fitted: + raise ValueError("PCA must be fitted first") + return np.cumsum(self.explained_variance_ratio_) + + def find_n_components_for_variance(self, target_variance: float = 0.95) -> int: + """ + Find number of components needed to explain target variance. + + Args: + target_variance: Target cumulative variance ratio + + Returns: + Number of components needed + """ + if not self.is_fitted: + raise ValueError("PCA must be fitted first") + + cumvar = self.get_cumulative_variance_ratio() + n_components = np.argmax(cumvar >= target_variance) + 1 + return min(n_components, len(cumvar)) + + def reconstruction_error(self, X: np.ndarray) -> float: + """ + Compute reconstruction error for given data. + + Args: + X: Input data to compute error for + + Returns: + Mean squared reconstruction error + """ + X_transformed = self.transform(X) + X_reconstructed = self.inverse_transform(X_transformed) + return np.mean((X - X_reconstructed) ** 2) + + def get_principal_components(self) -> np.ndarray: + """ + Get the principal components (eigenvectors). + + Returns: + Principal components matrix of shape (n_components, n_features) + """ + if not self.is_fitted: + raise ValueError("PCA must be fitted first") + return self.components_ + + def project_new_data(self, X_new: np.ndarray) -> np.ndarray: + """ + Project new data onto the fitted PCA space. + + Args: + X_new: New data to project + + Returns: + Projected data + """ + return self.transform(X_new) + + def get_config(self) -> dict: + """Get configuration dictionary.""" + return { + 'n_components': self.n_components, + 'standardize': self.standardize + } \ No newline at end of file diff --git a/udl_toolbox/projections/tsne.py b/udl_toolbox/projections/tsne.py new file mode 100644 index 0000000..4c74555 --- /dev/null +++ b/udl_toolbox/projections/tsne.py @@ -0,0 +1,399 @@ +""" +t-SNE-like projection implementation for dimensionality reduction and visualization. +""" + +import tensorflow as tf +import numpy as np +from typing import Optional, Callable +from sklearn.manifold import TSNE as SklearnTSNE +from sklearn.preprocessing import StandardScaler + + +class TSNEProjection: + """ + t-SNE-like projection for non-linear dimensionality reduction. + + This is a simplified implementation that uses sklearn's t-SNE internally + but provides a consistent interface with other projection methods. + For a full from-scratch implementation, this would require implementing + the complete t-SNE algorithm with gradient descent optimization. + """ + + def __init__( + self, + n_components: int = 2, + perplexity: float = 30.0, + learning_rate: float = 200.0, + n_iter: int = 1000, + random_state: Optional[int] = None, + standardize: bool = True + ): + """ + Initialize t-SNE projection. + + Args: + n_components: Dimension of the embedded space + perplexity: Related to the number of nearest neighbors + learning_rate: Learning rate for t-SNE optimization + n_iter: Maximum number of iterations + random_state: Random seed for reproducibility + standardize: Whether to standardize data before t-SNE + """ + self.n_components = n_components + self.perplexity = perplexity + self.learning_rate = learning_rate + self.n_iter = n_iter + self.random_state = random_state + self.standardize = standardize + + # Fitted parameters + self.embedding_ = None + self.scaler_ = None + self.is_fitted = False + + # Internal t-SNE model + self.tsne_model = None + + def fit(self, X: np.ndarray) -> 'TSNEProjection': + """ + Fit t-SNE to the data. + + Args: + X: Input data of shape (n_samples, n_features) + + Returns: + Self for method chaining + """ + X = np.array(X) + + # Standardize if requested + if self.standardize: + self.scaler_ = StandardScaler() + X_processed = self.scaler_.fit_transform(X) + else: + X_processed = X.copy() + + # Initialize t-SNE model + self.tsne_model = SklearnTSNE( + n_components=self.n_components, + perplexity=self.perplexity, + learning_rate=self.learning_rate, + n_iter=self.n_iter, + random_state=self.random_state + ) + + # Fit and transform + self.embedding_ = self.tsne_model.fit_transform(X_processed) + self.is_fitted = True + + return self + + def fit_transform(self, X: np.ndarray) -> np.ndarray: + """ + Fit t-SNE and return embedding. + + Args: + X: Input data of shape (n_samples, n_features) + + Returns: + Embedded data of shape (n_samples, n_components) + """ + self.fit(X) + return self.embedding_ + + def transform(self, X: np.ndarray) -> np.ndarray: + """ + Transform new data using fitted t-SNE. + + Note: t-SNE doesn't naturally support out-of-sample extension. + This method raises an error and suggests alternatives. + + Args: + X: New input data + + Raises: + NotImplementedError: t-SNE doesn't support out-of-sample extension + """ + raise NotImplementedError( + "t-SNE doesn't support out-of-sample extension. " + "Use fit_transform on all data at once, or consider using " + "parametric t-SNE or other methods for new data projection." + ) + + def approximate_transform(self, X_new: np.ndarray, X_original: np.ndarray) -> np.ndarray: + """ + Approximate transformation for new data using k-nearest neighbors. + + This is a workaround for t-SNE's lack of out-of-sample extension. + + Args: + X_new: New data to transform + X_original: Original training data + + Returns: + Approximate embedding for new data + """ + if not self.is_fitted: + raise ValueError("t-SNE must be fitted first") + + from sklearn.neighbors import NearestNeighbors + + # Standardize new data using fitted scaler + if self.standardize: + X_new_processed = self.scaler_.transform(X_new) + X_original_processed = self.scaler_.transform(X_original) + else: + X_new_processed = X_new.copy() + X_original_processed = X_original.copy() + + # Find nearest neighbors in original data + nn = NearestNeighbors(n_neighbors=min(5, len(X_original_processed))) + nn.fit(X_original_processed) + + distances, indices = nn.kneighbors(X_new_processed) + + # Weighted average of embeddings of nearest neighbors + weights = 1 / (distances + 1e-8) # Inverse distance weighting + weights = weights / np.sum(weights, axis=1, keepdims=True) + + embeddings_new = [] + for i in range(len(X_new)): + neighbor_embeddings = self.embedding_[indices[i]] + weighted_embedding = np.average(neighbor_embeddings, weights=weights[i], axis=0) + embeddings_new.append(weighted_embedding) + + return np.array(embeddings_new) + + def get_embedding(self) -> np.ndarray: + """ + Get the fitted embedding. + + Returns: + Embedded data + """ + if not self.is_fitted: + raise ValueError("t-SNE must be fitted first") + return self.embedding_ + + def get_kl_divergence(self) -> float: + """ + Get the final KL divergence of the t-SNE embedding. + + Returns: + KL divergence value + """ + if not self.is_fitted: + raise ValueError("t-SNE must be fitted first") + return self.tsne_model.kl_divergence_ + + def get_config(self) -> dict: + """Get configuration dictionary.""" + return { + 'n_components': self.n_components, + 'perplexity': self.perplexity, + 'learning_rate': self.learning_rate, + 'n_iter': self.n_iter, + 'random_state': self.random_state, + 'standardize': self.standardize + } + + +class ParametricTSNE: + """ + Parametric t-SNE implementation using neural networks. + + This provides a way to learn a mapping function that can be applied + to new data, addressing the out-of-sample problem of standard t-SNE. + """ + + def __init__( + self, + n_components: int = 2, + hidden_layers: list = [500, 500, 2000], + perplexity: float = 30.0, + learning_rate: float = 0.01, + epochs: int = 1000, + batch_size: int = 500, + standardize: bool = True + ): + """ + Initialize parametric t-SNE. + + Args: + n_components: Dimension of the embedded space + hidden_layers: List of hidden layer sizes + perplexity: Related to the number of nearest neighbors + learning_rate: Learning rate for optimization + epochs: Number of training epochs + batch_size: Batch size for training + standardize: Whether to standardize input data + """ + self.n_components = n_components + self.hidden_layers = hidden_layers + self.perplexity = perplexity + self.learning_rate = learning_rate + self.epochs = epochs + self.batch_size = batch_size + self.standardize = standardize + + # Model components + self.model = None + self.scaler_ = None + self.is_fitted = False + + def _build_model(self, input_dim: int) -> tf.keras.Model: + """Build the parametric t-SNE neural network.""" + inputs = tf.keras.Input(shape=(input_dim,)) + x = inputs + + # Hidden layers + for i, units in enumerate(self.hidden_layers): + x = tf.keras.layers.Dense(units, activation='relu', name=f'hidden_{i}')(x) + x = tf.keras.layers.BatchNormalization()(x) + x = tf.keras.layers.Dropout(0.1)(x) + + # Output layer + outputs = tf.keras.layers.Dense(self.n_components, activation='linear', name='embedding')(x) + + return tf.keras.Model(inputs, outputs) + + def _compute_pairwise_distances(self, X: tf.Tensor) -> tf.Tensor: + """Compute pairwise squared Euclidean distances.""" + # X shape: (batch_size, n_features) + sum_X = tf.reduce_sum(tf.square(X), axis=1, keepdims=True) + distances = sum_X + tf.transpose(sum_X) - 2 * tf.matmul(X, tf.transpose(X)) + return tf.maximum(distances, 0.0) # Ensure non-negative + + def _compute_p_similarities(self, distances: tf.Tensor) -> tf.Tensor: + """Compute P similarities for high-dimensional space.""" + # Convert distances to similarities using Gaussian kernel + # This is a simplified version - full t-SNE uses perplexity calibration + sigma = 1.0 # Simplified: should be calibrated based on perplexity + similarities = tf.exp(-distances / (2 * sigma ** 2)) + + # Symmetrize and normalize + similarities = (similarities + tf.transpose(similarities)) / 2 + similarities = similarities / tf.reduce_sum(similarities) + + # Add small epsilon to avoid numerical issues + return tf.maximum(similarities, 1e-12) + + def _compute_q_similarities(self, Y: tf.Tensor) -> tf.Tensor: + """Compute Q similarities for low-dimensional space using t-distribution.""" + distances = self._compute_pairwise_distances(Y) + similarities = 1.0 / (1.0 + distances) + + # Set diagonal to 0 and normalize + similarities = similarities * (1 - tf.eye(tf.shape(similarities)[0])) + similarities = similarities / tf.reduce_sum(similarities) + + return tf.maximum(similarities, 1e-12) + + @tf.function + def _tsne_loss(self, X: tf.Tensor, Y: tf.Tensor) -> tf.Tensor: + """Compute t-SNE KL divergence loss.""" + # Compute similarities + P = self._compute_p_similarities(self._compute_pairwise_distances(X)) + Q = self._compute_q_similarities(Y) + + # KL divergence: sum(P * log(P / Q)) + kl_div = tf.reduce_sum(P * tf.math.log(P / Q)) + return kl_div + + def fit(self, X: np.ndarray) -> 'ParametricTSNE': + """ + Fit parametric t-SNE to the data. + + Args: + X: Input data of shape (n_samples, n_features) + + Returns: + Self for method chaining + """ + X = np.array(X) + + # Standardize if requested + if self.standardize: + self.scaler_ = StandardScaler() + X_processed = self.scaler_.fit_transform(X) + else: + X_processed = X.copy() + + # Build model + self.model = self._build_model(X_processed.shape[1]) + + # Compile model + optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate) + self.model.compile(optimizer=optimizer) + + # Convert to tensor dataset + dataset = tf.data.Dataset.from_tensor_slices(X_processed) + dataset = dataset.batch(self.batch_size).prefetch(tf.data.AUTOTUNE) + + # Training loop + for epoch in range(self.epochs): + epoch_loss = 0 + num_batches = 0 + + for batch in dataset: + with tf.GradientTape() as tape: + Y = self.model(batch, training=True) + loss = self._tsne_loss(batch, Y) + + gradients = tape.gradient(loss, self.model.trainable_variables) + optimizer.apply_gradients(zip(gradients, self.model.trainable_variables)) + + epoch_loss += loss + num_batches += 1 + + if epoch % 100 == 0: + print(f"Epoch {epoch}, Loss: {epoch_loss / num_batches:.6f}") + + self.is_fitted = True + return self + + def transform(self, X: np.ndarray) -> np.ndarray: + """ + Transform data using fitted parametric t-SNE. + + Args: + X: Input data to transform + + Returns: + Embedded data + """ + if not self.is_fitted: + raise ValueError("Parametric t-SNE must be fitted first") + + # Standardize using fitted scaler + if self.standardize: + X_processed = self.scaler_.transform(X) + else: + X_processed = X.copy() + + return self.model(X_processed, training=False).numpy() + + def fit_transform(self, X: np.ndarray) -> np.ndarray: + """ + Fit parametric t-SNE and transform data. + + Args: + X: Input data + + Returns: + Embedded data + """ + self.fit(X) + return self.transform(X) + + def get_config(self) -> dict: + """Get configuration dictionary.""" + return { + 'n_components': self.n_components, + 'hidden_layers': self.hidden_layers, + 'perplexity': self.perplexity, + 'learning_rate': self.learning_rate, + 'epochs': self.epochs, + 'batch_size': self.batch_size, + 'standardize': self.standardize + } \ No newline at end of file diff --git a/udl_toolbox/utils/__init__.py b/udl_toolbox/utils/__init__.py new file mode 100644 index 0000000..62dde32 --- /dev/null +++ b/udl_toolbox/utils/__init__.py @@ -0,0 +1,9 @@ +"""Utility functions.""" + +from .data_preprocessing import DataPreprocessor +from .model_io import ModelSaver + +__all__ = [ + "DataPreprocessor", + "ModelSaver", +] \ No newline at end of file diff --git a/udl_toolbox/utils/data_preprocessing.py b/udl_toolbox/utils/data_preprocessing.py new file mode 100644 index 0000000..9d5eeb5 --- /dev/null +++ b/udl_toolbox/utils/data_preprocessing.py @@ -0,0 +1,393 @@ +""" +Data preprocessing utilities for autoencoders. +""" + +import numpy as np +import tensorflow as tf +from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler +from sklearn.model_selection import train_test_split +from typing import Tuple, Optional, Dict, Any, Union +import warnings + + +class DataPreprocessor: + """ + Comprehensive data preprocessing utilities for autoencoder training. + + Provides methods for scaling, normalization, train/validation splitting, + and data format conversion for various autoencoder types. + """ + + def __init__(self, scaling_method: str = 'standard'): + """ + Initialize data preprocessor. + + Args: + scaling_method: Method for scaling ('standard', 'minmax', 'robust', 'none') + """ + self.scaling_method = scaling_method + self.scaler = None + self.is_fitted = False + + # Initialize scaler + if scaling_method == 'standard': + self.scaler = StandardScaler() + elif scaling_method == 'minmax': + self.scaler = MinMaxScaler() + elif scaling_method == 'robust': + self.scaler = RobustScaler() + elif scaling_method == 'none': + self.scaler = None + else: + raise ValueError(f"Unknown scaling method: {scaling_method}") + + def prepare_data( + self, + data: np.ndarray, + validation_split: float = 0.2, + test_split: Optional[float] = None, + random_state: int = 42, + shuffle: bool = True + ) -> Dict[str, np.ndarray]: + """ + Prepare data for autoencoder training. + + Args: + data: Input data array + validation_split: Fraction of data to use for validation + test_split: Optional fraction for test set + random_state: Random seed for reproducibility + shuffle: Whether to shuffle data before splitting + + Returns: + Dictionary with train/val/test splits + """ + data = np.array(data) + + # Handle different data shapes + if len(data.shape) > 2: + original_shape = data.shape + data_flat = data.reshape(data.shape[0], -1) + else: + original_shape = None + data_flat = data + + # Scale the data + if self.scaler is not None: + data_scaled = self.scaler.fit_transform(data_flat) + self.is_fitted = True + else: + data_scaled = data_flat + + # Reshape back if needed + if original_shape is not None: + data_scaled = data_scaled.reshape(original_shape) + + # Split data + if test_split is not None: + # Three-way split + X_temp, X_test = train_test_split( + data_scaled, test_size=test_split, + random_state=random_state, shuffle=shuffle + ) + X_train, X_val = train_test_split( + X_temp, test_size=validation_split/(1-test_split), + random_state=random_state, shuffle=shuffle + ) + return { + 'train': X_train, + 'validation': X_val, + 'test': X_test + } + else: + # Two-way split + X_train, X_val = train_test_split( + data_scaled, test_size=validation_split, + random_state=random_state, shuffle=shuffle + ) + return { + 'train': X_train, + 'validation': X_val + } + + def preprocess_images( + self, + images: np.ndarray, + target_size: Optional[Tuple[int, int]] = None, + normalize: bool = True, + augment: bool = False + ) -> np.ndarray: + """ + Preprocess image data for convolutional autoencoders. + + Args: + images: Image array of shape (n_samples, height, width, channels) + target_size: Optional target size for resizing + normalize: Whether to normalize pixel values to [0, 1] + augment: Whether to apply data augmentation + + Returns: + Preprocessed images + """ + images = np.array(images) + + # Ensure 4D shape + if len(images.shape) == 3: + images = np.expand_dims(images, axis=-1) + + # Resize if requested + if target_size is not None: + try: + from tensorflow.image import resize + images = resize(images, target_size).numpy() + except ImportError: + warnings.warn("TensorFlow image resize not available, skipping resize") + + # Normalize pixel values + if normalize: + if images.dtype == np.uint8: + images = images.astype(np.float32) / 255.0 + elif images.max() > 1.0: + images = images / images.max() + + # Data augmentation (simple transformations) + if augment: + images = self._apply_image_augmentation(images) + + return images + + def _apply_image_augmentation(self, images: np.ndarray) -> np.ndarray: + """Apply simple image augmentation.""" + # This is a placeholder for more sophisticated augmentation + # In practice, you'd use tf.image or imgaug + return images + + def preprocess_time_series( + self, + time_series: np.ndarray, + window_size: int, + step_size: int = 1, + normalize_windows: bool = True + ) -> np.ndarray: + """ + Preprocess time series data for sequence autoencoders. + + Args: + time_series: Time series data (n_series, n_timesteps, n_features) + window_size: Size of sliding windows + step_size: Step size for sliding windows + normalize_windows: Whether to normalize each window + + Returns: + Windowed time series data + """ + if len(time_series.shape) == 1: + time_series = time_series.reshape(1, -1, 1) + elif len(time_series.shape) == 2: + time_series = np.expand_dims(time_series, axis=-1) + + windows = [] + + for series in time_series: + series_windows = [] + for i in range(0, len(series) - window_size + 1, step_size): + window = series[i:i + window_size] + + if normalize_windows: + # Z-score normalization per window + window_mean = np.mean(window, axis=0) + window_std = np.std(window, axis=0) + window_std = np.where(window_std == 0, 1, window_std) # Avoid division by zero + window = (window - window_mean) / window_std + + series_windows.append(window) + + windows.extend(series_windows) + + return np.array(windows) + + def add_noise( + self, + data: np.ndarray, + noise_type: str = 'gaussian', + noise_level: float = 0.1, + random_state: int = 42 + ) -> np.ndarray: + """ + Add noise to data (useful for denoising autoencoders). + + Args: + data: Input data + noise_type: Type of noise ('gaussian', 'uniform', 'salt_pepper') + noise_level: Level of noise to add + random_state: Random seed + + Returns: + Noisy data + """ + np.random.seed(random_state) + noisy_data = data.copy() + + if noise_type == 'gaussian': + noise = np.random.normal(0, noise_level, data.shape) + noisy_data = data + noise + elif noise_type == 'uniform': + noise = np.random.uniform(-noise_level, noise_level, data.shape) + noisy_data = data + noise + elif noise_type == 'salt_pepper': + # Salt and pepper noise + prob = noise_level + random_matrix = np.random.random(data.shape) + noisy_data[random_matrix < prob/2] = 0 # Pepper noise + noisy_data[random_matrix > 1 - prob/2] = 1 # Salt noise + else: + raise ValueError(f"Unknown noise type: {noise_type}") + + # Clip values to valid range + if data.min() >= 0 and data.max() <= 1: + noisy_data = np.clip(noisy_data, 0, 1) + + return noisy_data + + def create_corrupted_pairs( + self, + data: np.ndarray, + corruption_ratio: float = 0.3, + corruption_type: str = 'masking' + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Create corrupted input-target pairs for denoising autoencoders. + + Args: + data: Clean input data + corruption_ratio: Fraction of data to corrupt + corruption_type: Type of corruption ('masking', 'noise', 'dropout') + + Returns: + Tuple of (corrupted_inputs, clean_targets) + """ + corrupted_data = data.copy() + + if corruption_type == 'masking': + # Random masking + mask = np.random.random(data.shape) < corruption_ratio + corrupted_data[mask] = 0 + elif corruption_type == 'noise': + # Add Gaussian noise + corrupted_data = self.add_noise(data, 'gaussian', corruption_ratio) + elif corruption_type == 'dropout': + # Random dropout + mask = np.random.random(data.shape) < corruption_ratio + corrupted_data[mask] = 0 + else: + raise ValueError(f"Unknown corruption type: {corruption_type}") + + return corrupted_data, data + + def transform(self, data: np.ndarray) -> np.ndarray: + """ + Transform new data using fitted preprocessor. + + Args: + data: New data to transform + + Returns: + Transformed data + """ + if not self.is_fitted and self.scaler is not None: + raise ValueError("Preprocessor must be fitted before transform") + + data = np.array(data) + + # Handle different data shapes + if len(data.shape) > 2: + original_shape = data.shape + data_flat = data.reshape(data.shape[0], -1) + else: + original_shape = None + data_flat = data + + # Scale the data + if self.scaler is not None: + data_scaled = self.scaler.transform(data_flat) + else: + data_scaled = data_flat + + # Reshape back if needed + if original_shape is not None: + data_scaled = data_scaled.reshape(original_shape) + + return data_scaled + + def inverse_transform(self, data: np.ndarray) -> np.ndarray: + """ + Inverse transform data back to original scale. + + Args: + data: Transformed data + + Returns: + Data in original scale + """ + if not self.is_fitted and self.scaler is not None: + raise ValueError("Preprocessor must be fitted before inverse transform") + + if self.scaler is None: + return data + + data = np.array(data) + + # Handle different data shapes + if len(data.shape) > 2: + original_shape = data.shape + data_flat = data.reshape(data.shape[0], -1) + else: + original_shape = None + data_flat = data + + # Inverse transform + data_original = self.scaler.inverse_transform(data_flat) + + # Reshape back if needed + if original_shape is not None: + data_original = data_original.reshape(original_shape) + + return data_original + + def get_preprocessing_stats(self, data: np.ndarray) -> Dict[str, Any]: + """ + Get statistics about the data for preprocessing analysis. + + Args: + data: Input data + + Returns: + Dictionary of data statistics + """ + data = np.array(data) + + stats = { + 'shape': data.shape, + 'dtype': data.dtype, + 'min': float(np.min(data)), + 'max': float(np.max(data)), + 'mean': float(np.mean(data)), + 'std': float(np.std(data)), + 'n_samples': data.shape[0], + 'memory_usage_mb': data.nbytes / (1024 * 1024) + } + + # Check for common data issues + stats['has_nan'] = bool(np.any(np.isnan(data))) + stats['has_inf'] = bool(np.any(np.isinf(data))) + stats['is_normalized'] = bool(stats['min'] >= 0 and stats['max'] <= 1) + stats['is_standardized'] = bool(abs(stats['mean']) < 0.1 and abs(stats['std'] - 1.0) < 0.1) + + return stats + + def get_config(self) -> Dict[str, Any]: + """Get configuration dictionary.""" + return { + 'scaling_method': self.scaling_method, + 'is_fitted': self.is_fitted + } \ No newline at end of file diff --git a/udl_toolbox/utils/model_io.py b/udl_toolbox/utils/model_io.py new file mode 100644 index 0000000..1895852 --- /dev/null +++ b/udl_toolbox/utils/model_io.py @@ -0,0 +1,429 @@ +""" +Model saving and loading utilities. +""" + +import os +import json +import pickle +import tensorflow as tf +import numpy as np +from typing import Dict, Any, Optional +import warnings + + +class ModelSaver: + """ + Comprehensive model saving and loading utilities for autoencoders. + + Provides methods for saving complete models, weights only, configurations, + and custom serialization for different autoencoder types. + """ + + def __init__(self): + """Initialize model saver.""" + pass + + def save_autoencoder( + self, + autoencoder, + save_path: str, + save_format: str = 'complete', + include_optimizer: bool = True, + save_config: bool = True + ) -> None: + """ + Save autoencoder model with various options. + + Args: + autoencoder: Autoencoder instance to save + save_path: Base path for saving (without extension) + save_format: Format for saving ('complete', 'weights', 'savedmodel') + include_optimizer: Whether to save optimizer state + save_config: Whether to save model configuration + """ + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(save_path) if os.path.dirname(save_path) else '.', exist_ok=True) + + if save_format == 'complete': + # Save the complete Keras model + autoencoder.autoencoder.save(f"{save_path}_autoencoder.h5") + autoencoder.encoder.save(f"{save_path}_encoder.h5") + autoencoder.decoder.save(f"{save_path}_decoder.h5") + + elif save_format == 'weights': + # Save only weights + autoencoder.autoencoder.save_weights(f"{save_path}_autoencoder_weights.h5") + autoencoder.encoder.save_weights(f"{save_path}_encoder_weights.h5") + autoencoder.decoder.save_weights(f"{save_path}_decoder_weights.h5") + + elif save_format == 'savedmodel': + # Save in TensorFlow SavedModel format + tf.saved_model.save(autoencoder.autoencoder, f"{save_path}_autoencoder") + tf.saved_model.save(autoencoder.encoder, f"{save_path}_encoder") + tf.saved_model.save(autoencoder.decoder, f"{save_path}_decoder") + + else: + raise ValueError(f"Unknown save format: {save_format}") + + # Save configuration + if save_config: + config = autoencoder.get_config() + config['model_type'] = type(autoencoder).__name__ + + with open(f"{save_path}_config.json", 'w') as f: + json.dump(config, f, indent=2) + + # Save training history if available + if hasattr(autoencoder, 'history') and autoencoder.history: + with open(f"{save_path}_history.json", 'w') as f: + # Convert numpy arrays to lists for JSON serialization + history_serializable = {} + for key, value in autoencoder.history.items(): + if isinstance(value, np.ndarray): + history_serializable[key] = value.tolist() + elif isinstance(value, list): + history_serializable[key] = value + else: + history_serializable[key] = str(value) + + json.dump(history_serializable, f, indent=2) + + print(f"Autoencoder saved to {save_path} with format '{save_format}'") + + def load_autoencoder( + self, + save_path: str, + autoencoder_class, + load_format: str = 'complete' + ): + """ + Load autoencoder model. + + Args: + save_path: Base path where model was saved + autoencoder_class: Class of the autoencoder to instantiate + load_format: Format to load from ('complete', 'weights', 'savedmodel') + + Returns: + Loaded autoencoder instance + """ + # Load configuration + config_path = f"{save_path}_config.json" + if os.path.exists(config_path): + with open(config_path, 'r') as f: + config = json.load(f) + + # Remove non-constructor parameters + model_type = config.pop('model_type', None) + + # Create autoencoder instance + autoencoder = autoencoder_class(**config) + + else: + raise FileNotFoundError(f"Configuration file not found: {config_path}") + + # Load model weights/architecture + if load_format == 'complete': + try: + autoencoder.autoencoder = tf.keras.models.load_model(f"{save_path}_autoencoder.h5") + autoencoder.encoder = tf.keras.models.load_model(f"{save_path}_encoder.h5") + autoencoder.decoder = tf.keras.models.load_model(f"{save_path}_decoder.h5") + except Exception as e: + warnings.warn(f"Failed to load complete models: {e}. Trying weights-only loading.") + load_format = 'weights' + + if load_format == 'weights': + # Load weights only + autoencoder.autoencoder.load_weights(f"{save_path}_autoencoder_weights.h5") + autoencoder.encoder.load_weights(f"{save_path}_encoder_weights.h5") + autoencoder.decoder.load_weights(f"{save_path}_decoder_weights.h5") + + elif load_format == 'savedmodel': + # Load from SavedModel format + autoencoder.autoencoder = tf.saved_model.load(f"{save_path}_autoencoder") + autoencoder.encoder = tf.saved_model.load(f"{save_path}_encoder") + autoencoder.decoder = tf.saved_model.load(f"{save_path}_decoder") + + # Load training history if available + history_path = f"{save_path}_history.json" + if os.path.exists(history_path): + with open(history_path, 'r') as f: + history = json.load(f) + autoencoder.history = history + + print(f"Autoencoder loaded from {save_path}") + return autoencoder + + def save_model_checkpoint( + self, + autoencoder, + checkpoint_path: str, + epoch: int, + loss: float, + additional_info: Optional[Dict[str, Any]] = None + ) -> None: + """ + Save model checkpoint during training. + + Args: + autoencoder: Autoencoder instance + checkpoint_path: Path for checkpoint + epoch: Current epoch number + loss: Current loss value + additional_info: Additional information to save + """ + checkpoint_dir = os.path.dirname(checkpoint_path) + os.makedirs(checkpoint_dir, exist_ok=True) + + # Save weights + autoencoder.autoencoder.save_weights(f"{checkpoint_path}_epoch_{epoch}_weights.h5") + + # Save checkpoint metadata + checkpoint_info = { + 'epoch': epoch, + 'loss': float(loss), + 'model_config': autoencoder.get_config(), + 'timestamp': str(tf.timestamp()), + } + + if additional_info: + checkpoint_info.update(additional_info) + + with open(f"{checkpoint_path}_epoch_{epoch}_info.json", 'w') as f: + json.dump(checkpoint_info, f, indent=2) + + print(f"Checkpoint saved at epoch {epoch} with loss {loss:.6f}") + + def load_model_checkpoint( + self, + checkpoint_path: str, + epoch: int, + autoencoder_class, + autoencoder_instance = None + ): + """ + Load model from checkpoint. + + Args: + checkpoint_path: Base checkpoint path + epoch: Epoch number to load + autoencoder_class: Autoencoder class + autoencoder_instance: Existing instance to load weights into + + Returns: + Autoencoder instance with loaded weights + """ + # Load checkpoint info + info_path = f"{checkpoint_path}_epoch_{epoch}_info.json" + if not os.path.exists(info_path): + raise FileNotFoundError(f"Checkpoint info not found: {info_path}") + + with open(info_path, 'r') as f: + checkpoint_info = json.load(f) + + # Create or use existing autoencoder instance + if autoencoder_instance is None: + config = checkpoint_info['model_config'] + autoencoder = autoencoder_class(**config) + else: + autoencoder = autoencoder_instance + + # Load weights + weights_path = f"{checkpoint_path}_epoch_{epoch}_weights.h5" + autoencoder.autoencoder.load_weights(weights_path) + + print(f"Checkpoint loaded from epoch {epoch}") + return autoencoder, checkpoint_info + + def save_preprocessor( + self, + preprocessor, + save_path: str + ) -> None: + """ + Save data preprocessor. + + Args: + preprocessor: DataPreprocessor instance + save_path: Path to save preprocessor + """ + with open(save_path, 'wb') as f: + pickle.dump(preprocessor, f) + print(f"Preprocessor saved to {save_path}") + + def load_preprocessor(self, save_path: str): + """ + Load data preprocessor. + + Args: + save_path: Path to load preprocessor from + + Returns: + Loaded preprocessor instance + """ + with open(save_path, 'rb') as f: + preprocessor = pickle.load(f) + print(f"Preprocessor loaded from {save_path}") + return preprocessor + + def save_training_results( + self, + save_path: str, + autoencoder, + train_data: np.ndarray, + validation_data: Optional[np.ndarray] = None, + additional_metrics: Optional[Dict[str, Any]] = None + ) -> None: + """ + Save comprehensive training results. + + Args: + save_path: Base path for saving results + autoencoder: Trained autoencoder + train_data: Training data + validation_data: Optional validation data + additional_metrics: Additional metrics to save + """ + # Save the model + self.save_autoencoder(autoencoder, save_path, save_format='complete') + + # Compute and save reconstruction metrics + train_reconstructions = autoencoder.reconstruct(train_data).numpy() + train_mse = np.mean((train_data - train_reconstructions) ** 2) + + results = { + 'train_mse': float(train_mse), + 'train_samples': len(train_data), + 'model_type': type(autoencoder).__name__, + 'latent_dim': autoencoder.latent_dim, + } + + if validation_data is not None: + val_reconstructions = autoencoder.reconstruct(validation_data).numpy() + val_mse = np.mean((validation_data - val_reconstructions) ** 2) + results['val_mse'] = float(val_mse) + results['val_samples'] = len(validation_data) + + if additional_metrics: + results.update(additional_metrics) + + # Save results + with open(f"{save_path}_results.json", 'w') as f: + json.dump(results, f, indent=2) + + print(f"Training results saved to {save_path}_results.json") + + def create_model_archive( + self, + autoencoder, + archive_path: str, + include_data_samples: bool = False, + data_samples: Optional[np.ndarray] = None + ) -> None: + """ + Create a complete archive of the model and related files. + + Args: + autoencoder: Autoencoder instance + archive_path: Path for the archive + include_data_samples: Whether to include sample data + data_samples: Sample data to include + """ + import zipfile + import tempfile + import shutil + + # Create temporary directory + with tempfile.TemporaryDirectory() as temp_dir: + base_path = os.path.join(temp_dir, 'model') + + # Save all model components + self.save_autoencoder(autoencoder, base_path, save_format='complete') + + # Save sample data if requested + if include_data_samples and data_samples is not None: + np.save(os.path.join(temp_dir, 'data_samples.npy'), data_samples) + + # Generate and save sample reconstructions + reconstructions = autoencoder.reconstruct(data_samples).numpy() + np.save(os.path.join(temp_dir, 'sample_reconstructions.npy'), reconstructions) + + # Create archive + with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + for root, dirs, files in os.walk(temp_dir): + for file in files: + file_path = os.path.join(root, file) + arcname = os.path.relpath(file_path, temp_dir) + zipf.write(file_path, arcname) + + print(f"Model archive created: {archive_path}") + + def extract_model_archive( + self, + archive_path: str, + extract_path: str, + autoencoder_class + ): + """ + Extract and load model from archive. + + Args: + archive_path: Path to the archive + extract_path: Path to extract to + autoencoder_class: Autoencoder class to instantiate + + Returns: + Loaded autoencoder instance + """ + import zipfile + + # Extract archive + with zipfile.ZipFile(archive_path, 'r') as zipf: + zipf.extractall(extract_path) + + # Load model + model_path = os.path.join(extract_path, 'model') + autoencoder = self.load_autoencoder(model_path, autoencoder_class) + + print(f"Model extracted and loaded from {archive_path}") + return autoencoder + + def list_saved_models(self, directory: str) -> list: + """ + List all saved models in a directory. + + Args: + directory: Directory to search + + Returns: + List of model information dictionaries + """ + models = [] + + for file in os.listdir(directory): + if file.endswith('_config.json'): + config_path = os.path.join(directory, file) + base_name = file.replace('_config.json', '') + + try: + with open(config_path, 'r') as f: + config = json.load(f) + + model_info = { + 'name': base_name, + 'type': config.get('model_type', 'Unknown'), + 'latent_dim': config.get('latent_dim', 'Unknown'), + 'config_path': config_path + } + + # Check for history file + history_path = os.path.join(directory, f"{base_name}_history.json") + if os.path.exists(history_path): + model_info['has_history'] = True + + models.append(model_info) + + except Exception as e: + print(f"Error reading config for {base_name}: {e}") + + return models \ No newline at end of file diff --git a/udl_toolbox/visualization/__init__.py b/udl_toolbox/visualization/__init__.py new file mode 100644 index 0000000..278b7c3 --- /dev/null +++ b/udl_toolbox/visualization/__init__.py @@ -0,0 +1,11 @@ +"""Visualization utilities.""" + +from .latent_space import LatentSpaceVisualizer +from .reconstruction import ReconstructionVisualizer +from .training import LossVisualizer + +__all__ = [ + "LatentSpaceVisualizer", + "ReconstructionVisualizer", + "LossVisualizer", +] \ No newline at end of file diff --git a/udl_toolbox/visualization/latent_space.py b/udl_toolbox/visualization/latent_space.py new file mode 100644 index 0000000..d5bbc37 --- /dev/null +++ b/udl_toolbox/visualization/latent_space.py @@ -0,0 +1,448 @@ +""" +Latent space visualization utilities. +""" + +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import plotly.graph_objects as go +import plotly.express as px +from plotly.subplots import make_subplots +import tensorflow as tf +from typing import Optional, List, Tuple, Dict, Any +from ..projections import PCAProjection, TSNEProjection + + +class LatentSpaceVisualizer: + """ + Comprehensive visualization tools for autoencoder latent spaces. + + Provides methods for visualizing latent representations, distributions, + and relationships between data points in the encoded space. + """ + + def __init__(self, autoencoder, figsize: Tuple[int, int] = (10, 8)): + """ + Initialize latent space visualizer. + + Args: + autoencoder: Fitted autoencoder model + figsize: Default figure size for matplotlib plots + """ + self.autoencoder = autoencoder + self.figsize = figsize + + def plot_2d_latent_space( + self, + data: np.ndarray, + labels: Optional[np.ndarray] = None, + method: str = 'direct', + title: str = "2D Latent Space", + save_path: Optional[str] = None, + interactive: bool = False + ) -> None: + """ + Plot 2D visualization of latent space. + + Args: + data: Input data to encode and visualize + labels: Optional labels for coloring points + method: Visualization method ('direct', 'pca', 'tsne') + title: Plot title + save_path: Optional path to save the plot + interactive: Whether to create interactive plotly plot + """ + # Encode data to latent space + latent_data = self.autoencoder.encode(data).numpy() + + # Reduce to 2D if necessary + if latent_data.shape[1] == 2: + latent_2d = latent_data + elif method == 'direct' and latent_data.shape[1] > 2: + # Use first 2 dimensions + latent_2d = latent_data[:, :2] + elif method == 'pca': + pca = PCAProjection(n_components=2) + latent_2d = pca.fit_transform(latent_data) + elif method == 'tsne': + tsne = TSNEProjection(n_components=2) + latent_2d = tsne.fit_transform(latent_data) + else: + raise ValueError(f"Unknown method: {method}") + + if interactive: + self._plot_2d_interactive(latent_2d, labels, title) + else: + self._plot_2d_static(latent_2d, labels, title, save_path) + + def _plot_2d_static( + self, + latent_2d: np.ndarray, + labels: Optional[np.ndarray], + title: str, + save_path: Optional[str] + ) -> None: + """Create static matplotlib 2D plot.""" + plt.figure(figsize=self.figsize) + + if labels is not None: + scatter = plt.scatter(latent_2d[:, 0], latent_2d[:, 1], c=labels, cmap='tab10', alpha=0.7) + plt.colorbar(scatter) + else: + plt.scatter(latent_2d[:, 0], latent_2d[:, 1], alpha=0.7) + + plt.xlabel('Latent Dimension 1') + plt.ylabel('Latent Dimension 2') + plt.title(title) + plt.grid(True, alpha=0.3) + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.show() + + def _plot_2d_interactive( + self, + latent_2d: np.ndarray, + labels: Optional[np.ndarray], + title: str + ) -> None: + """Create interactive plotly 2D plot.""" + if labels is not None: + fig = px.scatter( + x=latent_2d[:, 0], + y=latent_2d[:, 1], + color=labels, + title=title, + labels={'x': 'Latent Dimension 1', 'y': 'Latent Dimension 2'} + ) + else: + fig = px.scatter( + x=latent_2d[:, 0], + y=latent_2d[:, 1], + title=title, + labels={'x': 'Latent Dimension 1', 'y': 'Latent Dimension 2'} + ) + + fig.show() + + def plot_3d_latent_space( + self, + data: np.ndarray, + labels: Optional[np.ndarray] = None, + method: str = 'direct', + title: str = "3D Latent Space", + interactive: bool = True + ) -> None: + """ + Plot 3D visualization of latent space. + + Args: + data: Input data to encode and visualize + labels: Optional labels for coloring points + method: Visualization method ('direct', 'pca') + title: Plot title + interactive: Whether to create interactive plot + """ + # Encode data to latent space + latent_data = self.autoencoder.encode(data).numpy() + + # Reduce to 3D if necessary + if latent_data.shape[1] == 3: + latent_3d = latent_data + elif method == 'direct' and latent_data.shape[1] > 3: + # Use first 3 dimensions + latent_3d = latent_data[:, :3] + elif method == 'pca': + pca = PCAProjection(n_components=3) + latent_3d = pca.fit_transform(latent_data) + else: + raise ValueError(f"Unknown method: {method}") + + if interactive: + self._plot_3d_interactive(latent_3d, labels, title) + else: + self._plot_3d_static(latent_3d, labels, title) + + def _plot_3d_static( + self, + latent_3d: np.ndarray, + labels: Optional[np.ndarray], + title: str + ) -> None: + """Create static matplotlib 3D plot.""" + fig = plt.figure(figsize=self.figsize) + ax = fig.add_subplot(111, projection='3d') + + if labels is not None: + scatter = ax.scatter(latent_3d[:, 0], latent_3d[:, 1], latent_3d[:, 2], + c=labels, cmap='tab10', alpha=0.7) + plt.colorbar(scatter) + else: + ax.scatter(latent_3d[:, 0], latent_3d[:, 1], latent_3d[:, 2], alpha=0.7) + + ax.set_xlabel('Latent Dimension 1') + ax.set_ylabel('Latent Dimension 2') + ax.set_zlabel('Latent Dimension 3') + ax.set_title(title) + + plt.show() + + def _plot_3d_interactive( + self, + latent_3d: np.ndarray, + labels: Optional[np.ndarray], + title: str + ) -> None: + """Create interactive plotly 3D plot.""" + if labels is not None: + fig = px.scatter_3d( + x=latent_3d[:, 0], + y=latent_3d[:, 1], + z=latent_3d[:, 2], + color=labels, + title=title, + labels={'x': 'Latent Dimension 1', 'y': 'Latent Dimension 2', 'z': 'Latent Dimension 3'} + ) + else: + fig = px.scatter_3d( + x=latent_3d[:, 0], + y=latent_3d[:, 1], + z=latent_3d[:, 2], + title=title, + labels={'x': 'Latent Dimension 1', 'y': 'Latent Dimension 2', 'z': 'Latent Dimension 3'} + ) + + fig.show() + + def plot_latent_distributions( + self, + data: np.ndarray, + labels: Optional[np.ndarray] = None, + max_dims: int = 10, + title: str = "Latent Dimension Distributions" + ) -> None: + """ + Plot distributions of latent dimensions. + + Args: + data: Input data to encode + labels: Optional labels for group comparisons + max_dims: Maximum number of dimensions to plot + title: Plot title + """ + # Encode data + latent_data = self.autoencoder.encode(data).numpy() + n_dims = min(latent_data.shape[1], max_dims) + + # Create subplots + n_cols = min(4, n_dims) + n_rows = (n_dims + n_cols - 1) // n_cols + + fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3)) + if n_rows == 1: + axes = axes.reshape(1, -1) + + for i in range(n_dims): + row, col = i // n_cols, i % n_cols + ax = axes[row, col] + + if labels is not None: + # Plot separate distributions for each label + unique_labels = np.unique(labels) + for label in unique_labels: + mask = labels == label + ax.hist(latent_data[mask, i], alpha=0.7, bins=30, + label=f'Class {label}', density=True) + ax.legend() + else: + ax.hist(latent_data[:, i], bins=30, alpha=0.7, density=True) + + ax.set_title(f'Latent Dimension {i}') + ax.set_xlabel('Value') + ax.set_ylabel('Density') + ax.grid(True, alpha=0.3) + + # Hide unused subplots + for i in range(n_dims, n_rows * n_cols): + row, col = i // n_cols, i % n_cols + axes[row, col].set_visible(False) + + plt.suptitle(title) + plt.tight_layout() + plt.show() + + def plot_latent_correlation_matrix( + self, + data: np.ndarray, + title: str = "Latent Dimension Correlations" + ) -> None: + """ + Plot correlation matrix of latent dimensions. + + Args: + data: Input data to encode + title: Plot title + """ + # Encode data + latent_data = self.autoencoder.encode(data).numpy() + + # Compute correlation matrix + corr_matrix = np.corrcoef(latent_data.T) + + # Plot heatmap + plt.figure(figsize=self.figsize) + sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, + square=True, fmt='.2f', cbar_kws={'label': 'Correlation'}) + plt.title(title) + plt.xlabel('Latent Dimension') + plt.ylabel('Latent Dimension') + plt.show() + + def plot_latent_manifold_2d( + self, + grid_size: int = 20, + latent_range: Tuple[float, float] = (-3, 3), + title: str = "Latent Space Manifold" + ) -> None: + """ + Plot 2D latent space manifold (for 2D latent spaces). + + Args: + grid_size: Size of the grid for sampling + latent_range: Range for latent space sampling + title: Plot title + """ + if self.autoencoder.latent_dim != 2: + raise ValueError("Manifold visualization only supported for 2D latent spaces") + + # Create grid of latent points + x = np.linspace(latent_range[0], latent_range[1], grid_size) + y = np.linspace(latent_range[0], latent_range[1], grid_size) + xx, yy = np.meshgrid(x, y) + grid_points = np.column_stack([xx.ravel(), yy.ravel()]) + + # Decode grid points + reconstructions = self.autoencoder.decode(grid_points).numpy() + + # Plot grid of reconstructions + n_cols = grid_size + fig, axes = plt.subplots(grid_size, n_cols, figsize=(20, 20)) + + for i in range(grid_size): + for j in range(n_cols): + idx = i * n_cols + j + ax = axes[i, j] + + # Handle different data types + if len(reconstructions[idx].shape) == 1: + # 1D data + ax.plot(reconstructions[idx]) + elif len(reconstructions[idx].shape) == 2: + # 2D data or grayscale image + if reconstructions[idx].shape[-1] == 1: + ax.imshow(reconstructions[idx][:, :, 0], cmap='gray') + else: + ax.imshow(reconstructions[idx], cmap='viridis') + elif len(reconstructions[idx].shape) == 3: + # Color image + ax.imshow(reconstructions[idx]) + + ax.axis('off') + + plt.suptitle(title) + plt.tight_layout() + plt.show() + + def plot_latent_interpolation_path( + self, + start_point: np.ndarray, + end_point: np.ndarray, + num_steps: int = 10, + title: str = "Latent Space Interpolation" + ) -> None: + """ + Visualize interpolation path in 2D latent space. + + Args: + start_point: Starting point in input space + end_point: Ending point in input space + num_steps: Number of interpolation steps + title: Plot title + """ + from ..projections.interpolation import LatentSpaceInterpolation + + interpolator = LatentSpaceInterpolation(self.autoencoder) + z_interp, x_interp = interpolator.interpolate_data_points( + start_point, end_point, num_steps + ) + + # Plot interpolation path in latent space (if 2D) + if self.autoencoder.latent_dim == 2: + plt.figure(figsize=self.figsize) + plt.plot(z_interp[:, 0], z_interp[:, 1], 'o-', linewidth=2, markersize=8) + plt.scatter(z_interp[0, 0], z_interp[0, 1], color='green', s=100, label='Start') + plt.scatter(z_interp[-1, 0], z_interp[-1, 1], color='red', s=100, label='End') + plt.xlabel('Latent Dimension 1') + plt.ylabel('Latent Dimension 2') + plt.title(f'{title} - Latent Path') + plt.legend() + plt.grid(True, alpha=0.3) + plt.show() + + # Plot reconstructed interpolation + interpolator.visualize_interpolation(x_interp, title) + + def plot_cluster_analysis( + self, + data: np.ndarray, + labels: np.ndarray, + method: str = 'pca', + title: str = "Latent Space Clusters" + ) -> None: + """ + Analyze and visualize clusters in latent space. + + Args: + data: Input data + labels: Cluster labels + method: Dimensionality reduction method for visualization + title: Plot title + """ + # Encode data + latent_data = self.autoencoder.encode(data).numpy() + + # Reduce dimensionality for visualization + if method == 'pca': + reducer = PCAProjection(n_components=2) + latent_2d = reducer.fit_transform(latent_data) + elif method == 'tsne': + reducer = TSNEProjection(n_components=2) + latent_2d = reducer.fit_transform(latent_data) + else: + latent_2d = latent_data[:, :2] + + # Plot clusters + plt.figure(figsize=self.figsize) + scatter = plt.scatter(latent_2d[:, 0], latent_2d[:, 1], c=labels, cmap='tab10', alpha=0.7) + + # Add cluster centers + unique_labels = np.unique(labels) + for label in unique_labels: + mask = labels == label + center = np.mean(latent_2d[mask], axis=0) + plt.scatter(center[0], center[1], marker='x', s=200, color='black', linewidth=3) + + plt.colorbar(scatter, label='Cluster') + plt.xlabel('Latent Dimension 1') + plt.ylabel('Latent Dimension 2') + plt.title(title) + plt.grid(True, alpha=0.3) + plt.show() + + def get_config(self) -> Dict[str, Any]: + """Get configuration dictionary.""" + return { + 'autoencoder_type': type(self.autoencoder).__name__, + 'latent_dim': self.autoencoder.latent_dim, + 'figsize': self.figsize + } \ No newline at end of file diff --git a/udl_toolbox/visualization/reconstruction.py b/udl_toolbox/visualization/reconstruction.py new file mode 100644 index 0000000..953f274 --- /dev/null +++ b/udl_toolbox/visualization/reconstruction.py @@ -0,0 +1,455 @@ +""" +Reconstruction quality visualization utilities. +""" + +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import tensorflow as tf +from typing import Optional, List, Tuple, Dict, Any +from sklearn.metrics import mean_squared_error, mean_absolute_error + + +class ReconstructionVisualizer: + """ + Visualization tools for autoencoder reconstruction quality. + + Provides methods for comparing original and reconstructed data, + analyzing reconstruction errors, and visualizing quality metrics. + """ + + def __init__(self, autoencoder, figsize: Tuple[int, int] = (12, 8)): + """ + Initialize reconstruction visualizer. + + Args: + autoencoder: Fitted autoencoder model + figsize: Default figure size for matplotlib plots + """ + self.autoencoder = autoencoder + self.figsize = figsize + + def plot_reconstruction_comparison( + self, + data: np.ndarray, + num_samples: int = 10, + title: str = "Original vs Reconstructed", + save_path: Optional[str] = None + ) -> None: + """ + Plot side-by-side comparison of original and reconstructed data. + + Args: + data: Input data to reconstruct + num_samples: Number of samples to display + title: Plot title + save_path: Optional path to save the plot + """ + # Select random samples + indices = np.random.choice(len(data), min(num_samples, len(data)), replace=False) + selected_data = data[indices] + + # Get reconstructions + reconstructions = self.autoencoder.reconstruct(selected_data).numpy() + + # Determine data type and plot accordingly + if len(selected_data.shape) == 2: + # 1D data (each row is a sample) + self._plot_1d_reconstruction_comparison( + selected_data, reconstructions, title, save_path + ) + elif len(selected_data.shape) == 4: + # Image data + self._plot_image_reconstruction_comparison( + selected_data, reconstructions, title, save_path + ) + else: + raise ValueError(f"Unsupported data shape: {selected_data.shape}") + + def _plot_1d_reconstruction_comparison( + self, + original: np.ndarray, + reconstructed: np.ndarray, + title: str, + save_path: Optional[str] + ) -> None: + """Plot comparison for 1D data.""" + n_samples = len(original) + n_cols = min(5, n_samples) + n_rows = (n_samples + n_cols - 1) // n_cols + + fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 3, n_rows * 3)) + if n_rows == 1: + axes = axes.reshape(1, -1) + + for i in range(n_samples): + row, col = i // n_cols, i % n_cols + ax = axes[row, col] + + x_axis = np.arange(len(original[i])) + ax.plot(x_axis, original[i], label='Original', alpha=0.8) + ax.plot(x_axis, reconstructed[i], label='Reconstructed', alpha=0.8, linestyle='--') + + # Compute MSE for this sample + mse = mean_squared_error(original[i], reconstructed[i]) + ax.set_title(f'Sample {i} (MSE: {mse:.4f})') + ax.legend() + ax.grid(True, alpha=0.3) + + # Hide unused subplots + for i in range(n_samples, n_rows * n_cols): + row, col = i // n_cols, i % n_cols + axes[row, col].set_visible(False) + + plt.suptitle(title) + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.show() + + def _plot_image_reconstruction_comparison( + self, + original: np.ndarray, + reconstructed: np.ndarray, + title: str, + save_path: Optional[str] + ) -> None: + """Plot comparison for image data.""" + n_samples = len(original) + + fig, axes = plt.subplots(2, n_samples, figsize=(n_samples * 2, 4)) + if n_samples == 1: + axes = axes.reshape(2, 1) + + for i in range(n_samples): + # Original image + ax_orig = axes[0, i] + if original[i].shape[-1] == 1: + ax_orig.imshow(original[i][:, :, 0], cmap='gray') + else: + ax_orig.imshow(original[i]) + ax_orig.set_title(f'Original {i}') + ax_orig.axis('off') + + # Reconstructed image + ax_recon = axes[1, i] + if reconstructed[i].shape[-1] == 1: + ax_recon.imshow(reconstructed[i][:, :, 0], cmap='gray') + else: + ax_recon.imshow(np.clip(reconstructed[i], 0, 1)) + + # Compute MSE for this sample + mse = mean_squared_error(original[i].flatten(), reconstructed[i].flatten()) + ax_recon.set_title(f'Recon {i} (MSE: {mse:.4f})') + ax_recon.axis('off') + + plt.suptitle(title) + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.show() + + def plot_reconstruction_error_distribution( + self, + data: np.ndarray, + error_type: str = 'mse', + bins: int = 50, + title: str = "Reconstruction Error Distribution" + ) -> None: + """ + Plot distribution of reconstruction errors. + + Args: + data: Input data + error_type: Type of error ('mse', 'mae', 'pixel_wise') + bins: Number of histogram bins + title: Plot title + """ + # Get reconstructions + reconstructions = self.autoencoder.reconstruct(data).numpy() + + # Compute errors + if error_type == 'mse': + errors = [mean_squared_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + error_label = 'Mean Squared Error' + elif error_type == 'mae': + errors = [mean_absolute_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + error_label = 'Mean Absolute Error' + elif error_type == 'pixel_wise': + errors = np.mean((data - reconstructions) ** 2, axis=tuple(range(1, len(data.shape)))) + error_label = 'Pixel-wise MSE' + else: + raise ValueError(f"Unknown error type: {error_type}") + + # Plot distribution + plt.figure(figsize=self.figsize) + plt.hist(errors, bins=bins, alpha=0.7, edgecolor='black') + plt.xlabel(error_label) + plt.ylabel('Frequency') + plt.title(title) + plt.grid(True, alpha=0.3) + + # Add statistics + plt.axvline(np.mean(errors), color='red', linestyle='--', + label=f'Mean: {np.mean(errors):.4f}') + plt.axvline(np.median(errors), color='orange', linestyle='--', + label=f'Median: {np.median(errors):.4f}') + plt.legend() + plt.show() + + def plot_reconstruction_error_heatmap( + self, + data: np.ndarray, + title: str = "Reconstruction Error Heatmap" + ) -> None: + """ + Plot heatmap of reconstruction errors (for image data). + + Args: + data: Input image data + title: Plot title + """ + if len(data.shape) != 4: + raise ValueError("Heatmap visualization only supported for image data") + + # Get reconstructions + reconstructions = self.autoencoder.reconstruct(data).numpy() + + # Compute pixel-wise squared errors + squared_errors = (data - reconstructions) ** 2 + + # Average over samples and channels + if squared_errors.shape[-1] > 1: + avg_error = np.mean(squared_errors, axis=(0, 3)) + else: + avg_error = np.mean(squared_errors, axis=(0, 3))[:, :, 0] + + # Plot heatmap + plt.figure(figsize=self.figsize) + sns.heatmap(avg_error, cmap='hot', cbar_kws={'label': 'Mean Squared Error'}) + plt.title(title) + plt.xlabel('Pixel X') + plt.ylabel('Pixel Y') + plt.show() + + def plot_worst_reconstructions( + self, + data: np.ndarray, + num_worst: int = 5, + error_type: str = 'mse', + title: str = "Worst Reconstructions" + ) -> None: + """ + Plot samples with worst reconstruction quality. + + Args: + data: Input data + num_worst: Number of worst samples to show + error_type: Type of error to use for ranking + title: Plot title + """ + # Get reconstructions + reconstructions = self.autoencoder.reconstruct(data).numpy() + + # Compute errors for each sample + if error_type == 'mse': + errors = [mean_squared_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + elif error_type == 'mae': + errors = [mean_absolute_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + else: + raise ValueError(f"Unknown error type: {error_type}") + + # Find worst samples + worst_indices = np.argsort(errors)[-num_worst:][::-1] + + # Plot worst reconstructions + if len(data.shape) == 2: + self._plot_1d_reconstruction_comparison( + data[worst_indices], reconstructions[worst_indices], title, None + ) + elif len(data.shape) == 4: + self._plot_image_reconstruction_comparison( + data[worst_indices], reconstructions[worst_indices], title, None + ) + + def plot_best_reconstructions( + self, + data: np.ndarray, + num_best: int = 5, + error_type: str = 'mse', + title: str = "Best Reconstructions" + ) -> None: + """ + Plot samples with best reconstruction quality. + + Args: + data: Input data + num_best: Number of best samples to show + error_type: Type of error to use for ranking + title: Plot title + """ + # Get reconstructions + reconstructions = self.autoencoder.reconstruct(data).numpy() + + # Compute errors for each sample + if error_type == 'mse': + errors = [mean_squared_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + elif error_type == 'mae': + errors = [mean_absolute_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + else: + raise ValueError(f"Unknown error type: {error_type}") + + # Find best samples + best_indices = np.argsort(errors)[:num_best] + + # Plot best reconstructions + if len(data.shape) == 2: + self._plot_1d_reconstruction_comparison( + data[best_indices], reconstructions[best_indices], title, None + ) + elif len(data.shape) == 4: + self._plot_image_reconstruction_comparison( + data[best_indices], reconstructions[best_indices], title, None + ) + + def plot_reconstruction_quality_vs_latent_dim( + self, + data: np.ndarray, + latent_dims: List[int], + error_type: str = 'mse', + title: str = "Reconstruction Quality vs Latent Dimension" + ) -> None: + """ + Plot reconstruction quality as a function of latent dimension. + + Args: + data: Input data + latent_dims: List of latent dimensions to test + error_type: Type of error to measure + title: Plot title + """ + # This would require training multiple models - simplified version + print("Note: This would require training autoencoders with different latent dimensions.") + print("For demonstration, showing concept with current model:") + + # Get reconstructions with current model + reconstructions = self.autoencoder.reconstruct(data).numpy() + + if error_type == 'mse': + current_error = np.mean([mean_squared_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))]) + elif error_type == 'mae': + current_error = np.mean([mean_absolute_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))]) + + plt.figure(figsize=self.figsize) + plt.plot([self.autoencoder.latent_dim], [current_error], 'ro', markersize=10, + label=f'Current Model (dim={self.autoencoder.latent_dim})') + plt.xlabel('Latent Dimension') + plt.ylabel(f'{error_type.upper()}') + plt.title(title) + plt.grid(True, alpha=0.3) + plt.legend() + plt.show() + + def compute_reconstruction_metrics( + self, + data: np.ndarray + ) -> Dict[str, float]: + """ + Compute comprehensive reconstruction metrics. + + Args: + data: Input data + + Returns: + Dictionary of reconstruction metrics + """ + # Get reconstructions + reconstructions = self.autoencoder.reconstruct(data).numpy() + + # Compute various metrics + metrics = {} + + # Mean squared error + mse_per_sample = [mean_squared_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + metrics['mse_mean'] = np.mean(mse_per_sample) + metrics['mse_std'] = np.std(mse_per_sample) + + # Mean absolute error + mae_per_sample = [mean_absolute_error(data[i].flatten(), reconstructions[i].flatten()) + for i in range(len(data))] + metrics['mae_mean'] = np.mean(mae_per_sample) + metrics['mae_std'] = np.std(mae_per_sample) + + # Structural similarity (for images) + if len(data.shape) == 4: + try: + from skimage.metrics import structural_similarity as ssim + ssim_scores = [] + for i in range(len(data)): + if data[i].shape[-1] == 1: + score = ssim(data[i][:, :, 0], reconstructions[i][:, :, 0]) + else: + score = ssim(data[i], reconstructions[i], multichannel=True) + ssim_scores.append(score) + metrics['ssim_mean'] = np.mean(ssim_scores) + metrics['ssim_std'] = np.std(ssim_scores) + except ImportError: + print("scikit-image not available for SSIM computation") + + # Peak signal-to-noise ratio + if len(data.shape) == 4: + psnr_scores = [] + for i in range(len(data)): + mse = mean_squared_error(data[i].flatten(), reconstructions[i].flatten()) + if mse == 0: + psnr = float('inf') + else: + psnr = 20 * np.log10(1.0 / np.sqrt(mse)) + psnr_scores.append(psnr) + metrics['psnr_mean'] = np.mean(psnr_scores) + metrics['psnr_std'] = np.std(psnr_scores) + + return metrics + + def print_reconstruction_summary( + self, + data: np.ndarray + ) -> None: + """ + Print summary of reconstruction quality metrics. + + Args: + data: Input data + """ + metrics = self.compute_reconstruction_metrics(data) + + print("=== Reconstruction Quality Summary ===") + print(f"Number of samples: {len(data)}") + print(f"Data shape: {data.shape}") + print() + print(f"MSE: {metrics['mse_mean']:.6f} ยฑ {metrics['mse_std']:.6f}") + print(f"MAE: {metrics['mae_mean']:.6f} ยฑ {metrics['mae_std']:.6f}") + + if 'ssim_mean' in metrics: + print(f"SSIM: {metrics['ssim_mean']:.4f} ยฑ {metrics['ssim_std']:.4f}") + + if 'psnr_mean' in metrics: + print(f"PSNR: {metrics['psnr_mean']:.2f} ยฑ {metrics['psnr_std']:.2f} dB") + + def get_config(self) -> Dict[str, Any]: + """Get configuration dictionary.""" + return { + 'autoencoder_type': type(self.autoencoder).__name__, + 'figsize': self.figsize + } \ No newline at end of file diff --git a/udl_toolbox/visualization/training.py b/udl_toolbox/visualization/training.py new file mode 100644 index 0000000..d14b62a --- /dev/null +++ b/udl_toolbox/visualization/training.py @@ -0,0 +1,457 @@ +""" +Training progress and loss visualization utilities. +""" + +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import plotly.graph_objects as go +from plotly.subplots import make_subplots +from typing import Dict, List, Optional, Tuple, Any + + +class LossVisualizer: + """ + Visualization tools for training progress and loss analysis. + + Provides methods for plotting training curves, loss components, + and analyzing training dynamics. + """ + + def __init__(self, figsize: Tuple[int, int] = (12, 8)): + """ + Initialize loss visualizer. + + Args: + figsize: Default figure size for matplotlib plots + """ + self.figsize = figsize + + def plot_training_curves( + self, + history: Dict[str, List[float]], + title: str = "Training Curves", + save_path: Optional[str] = None, + interactive: bool = False + ) -> None: + """ + Plot training and validation loss curves. + + Args: + history: Training history dictionary + title: Plot title + save_path: Optional path to save the plot + interactive: Whether to create interactive plotly plot + """ + if interactive: + self._plot_training_curves_interactive(history, title) + else: + self._plot_training_curves_static(history, title, save_path) + + def _plot_training_curves_static( + self, + history: Dict[str, List[float]], + title: str, + save_path: Optional[str] + ) -> None: + """Create static matplotlib training curves.""" + # Determine number of subplots needed + loss_types = [key for key in history.keys() if 'loss' in key.lower()] + n_plots = len(loss_types) + + if n_plots == 1: + fig, ax = plt.subplots(1, 1, figsize=self.figsize) + axes = [ax] + else: + n_cols = min(2, n_plots) + n_rows = (n_plots + n_cols - 1) // n_cols + fig, axes = plt.subplots(n_rows, n_cols, figsize=(self.figsize[0], self.figsize[1] * n_rows / 2)) + axes = axes.flatten() if n_plots > 1 else [axes] + + epochs = range(1, len(history[loss_types[0]]) + 1) + + for i, loss_type in enumerate(loss_types): + ax = axes[i] + + # Plot training loss + ax.plot(epochs, history[loss_type], label=f'Training {loss_type}', linewidth=2) + + # Plot validation loss if available + val_key = f'val_{loss_type}' + if val_key in history: + ax.plot(epochs, history[val_key], label=f'Validation {loss_type}', linewidth=2) + + ax.set_xlabel('Epoch') + ax.set_ylabel('Loss') + ax.set_title(loss_type.replace('_', ' ').title()) + ax.legend() + ax.grid(True, alpha=0.3) + + # Hide unused subplots + for i in range(n_plots, len(axes)): + axes[i].set_visible(False) + + plt.suptitle(title) + plt.tight_layout() + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.show() + + def _plot_training_curves_interactive( + self, + history: Dict[str, List[float]], + title: str + ) -> None: + """Create interactive plotly training curves.""" + loss_types = [key for key in history.keys() if 'loss' in key.lower()] + epochs = list(range(1, len(history[loss_types[0]]) + 1)) + + fig = make_subplots( + rows=len(loss_types), + cols=1, + subplot_titles=[loss_type.replace('_', ' ').title() for loss_type in loss_types], + shared_xaxes=True + ) + + for i, loss_type in enumerate(loss_types): + # Training loss + fig.add_trace( + go.Scatter( + x=epochs, + y=history[loss_type], + mode='lines', + name=f'Training {loss_type}', + line=dict(width=2) + ), + row=i+1, col=1 + ) + + # Validation loss if available + val_key = f'val_{loss_type}' + if val_key in history: + fig.add_trace( + go.Scatter( + x=epochs, + y=history[val_key], + mode='lines', + name=f'Validation {loss_type}', + line=dict(width=2) + ), + row=i+1, col=1 + ) + + fig.update_layout( + title=title, + xaxis_title='Epoch', + height=300 * len(loss_types) + ) + + fig.show() + + def plot_loss_components( + self, + history: Dict[str, List[float]], + title: str = "Loss Components", + save_path: Optional[str] = None + ) -> None: + """ + Plot individual loss components (reconstruction, regularization, etc.). + + Args: + history: Training history dictionary + title: Plot title + save_path: Optional path to save the plot + """ + epochs = range(1, len(history['loss']) + 1) + + plt.figure(figsize=self.figsize) + + # Plot total loss + plt.plot(epochs, history['loss'], label='Total Loss', linewidth=3, alpha=0.8) + + # Plot components + component_keys = [key for key in history.keys() + if key in ['reconstruction_loss', 'regularization_loss', 'kl_loss', 'sparsity_loss']] + + for key in component_keys: + if key in history: + plt.plot(epochs, history[key], label=key.replace('_', ' ').title(), linewidth=2) + + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.title(title) + plt.legend() + plt.grid(True, alpha=0.3) + plt.yscale('log') # Log scale often better for loss visualization + + if save_path: + plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.show() + + def plot_loss_smoothed( + self, + history: Dict[str, List[float]], + window_size: int = 10, + title: str = "Smoothed Training Curves" + ) -> None: + """ + Plot smoothed training curves using moving average. + + Args: + history: Training history dictionary + window_size: Size of moving average window + title: Plot title + """ + def smooth(data, window): + return np.convolve(data, np.ones(window)/window, mode='valid') + + epochs = range(1, len(history['loss']) + 1) + smoothed_epochs = range(window_size, len(history['loss']) + 1) + + plt.figure(figsize=self.figsize) + + # Plot original and smoothed total loss + plt.plot(epochs, history['loss'], alpha=0.3, label='Total Loss (raw)') + plt.plot(smoothed_epochs, smooth(history['loss'], window_size), + linewidth=2, label=f'Total Loss (smoothed, window={window_size})') + + # Plot validation loss if available + if 'val_loss' in history: + plt.plot(epochs, history['val_loss'], alpha=0.7, label='Validation Loss', linestyle='--') + + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.title(title) + plt.legend() + plt.grid(True, alpha=0.3) + plt.yscale('log') + plt.show() + + def plot_learning_rate_schedule( + self, + learning_rates: List[float], + title: str = "Learning Rate Schedule" + ) -> None: + """ + Plot learning rate schedule over training. + + Args: + learning_rates: List of learning rates per epoch + title: Plot title + """ + epochs = range(1, len(learning_rates) + 1) + + plt.figure(figsize=self.figsize) + plt.plot(epochs, learning_rates, linewidth=2) + plt.xlabel('Epoch') + plt.ylabel('Learning Rate') + plt.title(title) + plt.grid(True, alpha=0.3) + plt.yscale('log') + plt.show() + + def plot_gradient_norms( + self, + gradient_norms: List[float], + title: str = "Gradient Norms During Training" + ) -> None: + """ + Plot gradient norms over training (for gradient explosion/vanishing analysis). + + Args: + gradient_norms: List of gradient norms per step + title: Plot title + """ + steps = range(1, len(gradient_norms) + 1) + + plt.figure(figsize=self.figsize) + plt.plot(steps, gradient_norms, alpha=0.7) + plt.xlabel('Training Step') + plt.ylabel('Gradient Norm') + plt.title(title) + plt.grid(True, alpha=0.3) + plt.yscale('log') + + # Add reference lines + plt.axhline(y=1.0, color='red', linestyle='--', alpha=0.7, label='Gradient norm = 1') + plt.axhline(y=0.1, color='orange', linestyle='--', alpha=0.7, label='Gradient norm = 0.1') + plt.legend() + plt.show() + + def plot_loss_landscape_1d( + self, + autoencoder, + data_sample: np.ndarray, + parameter_direction: np.ndarray, + alpha_range: Tuple[float, float] = (-1.0, 1.0), + num_points: int = 50, + title: str = "1D Loss Landscape" + ) -> None: + """ + Plot 1D loss landscape along a specific direction. + + Args: + autoencoder: Autoencoder model + data_sample: Sample data for loss computation + parameter_direction: Direction in parameter space + alpha_range: Range of steps along direction + num_points: Number of points to evaluate + title: Plot title + """ + alphas = np.linspace(alpha_range[0], alpha_range[1], num_points) + losses = [] + + # Store original parameters + original_params = [param.numpy() for param in autoencoder.autoencoder.trainable_variables] + + for alpha in alphas: + # Modify parameters + for i, param in enumerate(autoencoder.autoencoder.trainable_variables): + param.assign(original_params[i] + alpha * parameter_direction[i]) + + # Compute loss + reconstructed = autoencoder.reconstruct(data_sample) + loss_dict = autoencoder._compute_loss(data_sample, reconstructed) + losses.append(float(loss_dict['total_loss'])) + + # Restore original parameters + for i, param in enumerate(autoencoder.autoencoder.trainable_variables): + param.assign(original_params[i]) + + plt.figure(figsize=self.figsize) + plt.plot(alphas, losses, linewidth=2) + plt.axvline(x=0, color='red', linestyle='--', alpha=0.7, label='Current parameters') + plt.xlabel('Step size (ฮฑ)') + plt.ylabel('Loss') + plt.title(title) + plt.legend() + plt.grid(True, alpha=0.3) + plt.show() + + def analyze_training_stability( + self, + history: Dict[str, List[float]], + window_size: int = 10 + ) -> Dict[str, float]: + """ + Analyze training stability metrics. + + Args: + history: Training history dictionary + window_size: Window size for variance computation + + Returns: + Dictionary of stability metrics + """ + def local_variance(data, window): + variances = [] + for i in range(window, len(data)): + window_data = data[i-window:i] + variances.append(np.var(window_data)) + return np.mean(variances) + + metrics = {} + + # Loss variance + metrics['loss_variance'] = local_variance(history['loss'], window_size) + + # Final convergence (slope of last 20% of training) + final_portion = int(0.2 * len(history['loss'])) + if final_portion > 1: + final_losses = history['loss'][-final_portion:] + epochs = np.arange(len(final_losses)) + slope = np.polyfit(epochs, final_losses, 1)[0] + metrics['final_slope'] = slope + + # Early stopping metric (best validation loss epoch) + if 'val_loss' in history: + best_epoch = np.argmin(history['val_loss']) + 1 + metrics['best_val_epoch'] = best_epoch + metrics['epochs_after_best'] = len(history['val_loss']) - best_epoch + + return metrics + + def plot_training_summary( + self, + history: Dict[str, List[float]], + title: str = "Training Summary" + ) -> None: + """ + Create comprehensive training summary plot. + + Args: + history: Training history dictionary + title: Plot title + """ + fig, axes = plt.subplots(2, 2, figsize=(15, 10)) + + epochs = range(1, len(history['loss']) + 1) + + # Loss curves + ax1 = axes[0, 0] + ax1.plot(epochs, history['loss'], label='Training Loss') + if 'val_loss' in history: + ax1.plot(epochs, history['val_loss'], label='Validation Loss') + ax1.set_xlabel('Epoch') + ax1.set_ylabel('Loss') + ax1.set_title('Loss Curves') + ax1.legend() + ax1.grid(True, alpha=0.3) + ax1.set_yscale('log') + + # Loss components + ax2 = axes[0, 1] + component_keys = ['reconstruction_loss', 'regularization_loss'] + for key in component_keys: + if key in history: + ax2.plot(epochs, history[key], label=key.replace('_', ' ').title()) + ax2.set_xlabel('Epoch') + ax2.set_ylabel('Loss') + ax2.set_title('Loss Components') + ax2.legend() + ax2.grid(True, alpha=0.3) + ax2.set_yscale('log') + + # Loss distribution + ax3 = axes[1, 0] + ax3.hist(history['loss'], bins=30, alpha=0.7, edgecolor='black') + ax3.set_xlabel('Loss Value') + ax3.set_ylabel('Frequency') + ax3.set_title('Loss Distribution') + ax3.grid(True, alpha=0.3) + + # Training statistics + ax4 = axes[1, 1] + stats_text = f""" +Training Statistics: +โ€ข Total Epochs: {len(epochs)} +โ€ข Final Loss: {history['loss'][-1]:.6f} +โ€ข Min Loss: {min(history['loss']):.6f} +โ€ข Max Loss: {max(history['loss']):.6f} +โ€ข Loss Std: {np.std(history['loss']):.6f} + """ + + if 'val_loss' in history: + best_val_epoch = np.argmin(history['val_loss']) + 1 + stats_text += f""" +โ€ข Best Val Epoch: {best_val_epoch} +โ€ข Best Val Loss: {min(history['val_loss']):.6f} + """ + + ax4.text(0.1, 0.9, stats_text, transform=ax4.transAxes, fontsize=12, + verticalalignment='top', fontfamily='monospace') + ax4.set_xlim(0, 1) + ax4.set_ylim(0, 1) + ax4.axis('off') + ax4.set_title('Training Statistics') + + plt.suptitle(title) + plt.tight_layout() + plt.show() + + def get_config(self) -> Dict[str, Any]: + """Get configuration dictionary.""" + return { + 'figsize': self.figsize + } \ No newline at end of file