# Advanced Experiments and Extensions

This notebook demonstrates some advanced experiments and extension features, including:

1. Continuous Hopfield Network
2. Deep Boltzmann Machine
3. Conditional Generation
4. Transfer Learning

These experiments show the flexibility and powerful functions of energy models and generative models.

In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sys
import os

# Add project root directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

# Import project modules
from src.hopfield.hopfield import HopfieldNetwork
from src.hopfield.visualizer import HopfieldVisualizer
from src.boltzmann.rbm import RBM
from src.boltzmann.sampler import RBMSampler
from src.utils.config import Config
from src.utils.data_loader import load_mnist, load_fashion_mnist
from src.utils.helpers import binary_to_image, image_to_binary

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Load configuration
config = Config()

# Set device
device = torch.device('cuda' if torch.cuda.is_available() and config.RBM_CONFIG['use_cuda'] else 'cpu')
print(f'Using device: {device}')

## 1. Continuous Hopfield Network

The continuous Hopfield network uses continuous dynamics equations instead of discrete updates.

In [None]:
class ContinuousHopfieldNetwork:
    """
    Continuous Hopfield Network
    """
    def __init__(self, n_neurons, tau=1.0, dt=0.1):
        self.n_neurons = n_neurons
        self.tau = tau  # Time constant
        self.dt = dt   # Time step
        
        # Initialize weights
        self.W = torch.zeros(n_neurons, n_neurons)
        
        # Initialize states
        self.states = torch.zeros(n_neurons)
    
    def store_patterns(self, patterns):
        """
        Store patterns using Hebbian learning rule
        """
        # Convert patterns to tensors
        if not isinstance(patterns, torch.Tensor):
            patterns = torch.tensor(patterns, dtype=torch.float32)
        
        # Hebbian learning rule
        self.W = torch.zeros(self.n_neurons, self.n_neurons)
        for p in patterns:
            self.W += torch.outer(p, p)
        
        # Remove diagonal elements
        self.W.fill_diagonal_(0)
        
        # Normalize weights
        self.W /= self.n_neurons
    
    def energy(self, state):
        """
        Calculate energy function
        """
        return -0.5 * torch.dot(state, torch.mv(self.W, state))
    
    def dynamics(self, state):
        """
        Calculate dynamics
        """
        return (-state + torch.mv(self.W, state)) / self.tau
    
    def update(self):
        """
        Update network state
        """
        # Euler method
        self.states += self.dt * self.dynamics(self.states)
    
    def recover(self, initial_state, n_steps=100):
        """
        Recover pattern from initial state
        """
        # Set initial state
        self.states = initial_state.clone()
        
        # Record energy changes
        energies = [self.energy(self.states).item()]
        
        # Update network
        for _ in range(n_steps):
            self.update()
            energies.append(self.energy(self.states).item())
        
        return self.states.clone(), energies

# Create continuous Hopfield network
continuous_hopfield = ContinuousHopfieldNetwork(n_neurons=100)

# Generate random patterns
n_patterns = 5
patterns = torch.randn(n_patterns, 100)
patterns = torch.sign(patterns)  # Binarize patterns

# Store patterns
continuous_hopfield.store_patterns(patterns)

print(f"Stored {n_patterns} patterns in the continuous Hopfield network")

In [None]:
# Test pattern recovery
def add_noise(pattern, noise_ratio=0.2):
    """
    Add noise to pattern
    """
    noisy_pattern = pattern.clone()
    n_flips = int(noise_ratio * len(pattern))
    flip_indices = np.random.choice(len(pattern), n_flips, replace=False)
    noisy_pattern[flip_indices] *= -1
    return noisy_pattern

# Select a pattern for testing
test_pattern = patterns[0]

# Add noise
noisy_pattern = add_noise(test_pattern, noise_ratio=0.3)

# Recover pattern
recovered_pattern, energies = continuous_hopfield.recover(noisy_pattern, n_steps=100)

# Calculate similarity
def similarity(p1, p2):
    """
    Calculate similarity between two patterns
    """
    return torch.dot(p1, p2).item() / len(p1)

original_similarity = similarity(test_pattern, noisy_pattern)
recovered_similarity = similarity(test_pattern, recovered_pattern)

print(f"Similarity between original and noisy pattern: {original_similarity:.4f}")
print(f"Similarity between original and recovered pattern: {recovered_similarity:.4f}")

# Plot energy changes
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(energies)
ax.set_xlabel('Step', fontsize=12)
ax.set_ylabel('Energy', fontsize=12)
ax.set_title('Energy Changes During Recovery', fontsize=14)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 2. Deep Boltzmann Machine

Deep Boltzmann Machine (DBM) is a multi-layer extension of Restricted Boltzmann Machine.

In [None]:
class DeepBoltzmannMachine:
    """
    Deep Boltzmann Machine (DBM)
    """
    def __init__(self, layer_sizes, k=1, learning_rate=0.01, use_cuda=False):
        self.layer_sizes = layer_sizes
        self.n_layers = len(layer_sizes) - 1
        self.k = k
        self.learning_rate = learning_rate
        self.use_cuda = use_cuda
        
        # Initialize weights and biases
        self.weights = []
        self.biases = []
        
        for i in range(self.n_layers):
            # Initialize weights
            W = torch.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01
            self.weights.append(W)
            
            # Initialize biases
            if i == 0:  # Visible layer bias
                b = torch.zeros(layer_sizes[i])
            else:  # Hidden layer bias
                b = torch.zeros(layer_sizes[i])
            self.biases.append(b)
            
            # Next layer bias
            if i == self.n_layers - 1:  # Last hidden layer bias
                b = torch.zeros(layer_sizes[i+1])
            else:  # Hidden layer bias
                b = torch.zeros(layer_sizes[i+1])
            self.biases.append(b)
        
        # Move to GPU if needed
        if self.use_cuda:
            for i in range(len(self.weights)):
                self.weights[i] = self.weights[i].cuda()
                self.biases[i] = self.biases[i].cuda()
                self.biases[i+1] = self.biases[i+1].cuda()
    
    def sample_hidden(self, visible, layer_idx):
        """
        Sample hidden layer given visible layer
        """
        # Calculate activation probability
        activation = torch.matmul(visible, self.weights[layer_idx]) + self.biases[layer_idx+1]
        
        # For the top layer, also consider the weight from the layer above
        if layer_idx < self.n_layers - 1:
            # This is a simplified implementation
            # In practice, mean field approximation or other methods are needed
            pass
        
        # Sample binary units
        prob = torch.sigmoid(activation)
        return torch.bernoulli(prob)
    
    def sample_visible(self, hidden, layer_idx):
        """
        Sample visible layer given hidden layer
        """
        # Calculate activation probability
        activation = torch.matmul(hidden, self.weights[layer_idx].t()) + self.biases[layer_idx]
        
        # For the bottom layer, also consider the weight from the layer below
        if layer_idx > 0:
            # This is a simplified implementation
            # In practice, mean field approximation or other methods are needed
            pass
        
        # Sample binary units
        prob = torch.sigmoid(activation)
        return torch.bernoulli(prob)
    
    def contrastive_divergence(self, v0, k=None):
        """
        Contrastive divergence algorithm
        """
        if k is None:
            k = self.k
        
        # Positive phase
        ph_means, ph_samples = [], []
        vk = v0
        
        # Bottom-up pass
        for i in range(self.n_layers):
            ph_mean = torch.sigmoid(torch.matmul(vk, self.weights[i]) + self.biases[i+1])
            ph_sample = torch.bernoulli(ph_mean)
            ph_means.append(ph_mean)
            ph_samples.append(ph_sample)
            vk = ph_sample
        
        # Negative phase
        vk = v0.clone()
        
        # Gibbs sampling
        for step in range(k):
            # Bottom-up pass
            for i in range(self.n_layers):
                ph_mean = torch.sigmoid(torch.matmul(vk, self.weights[i]) + self.biases[i+1])
                ph_sample = torch.bernoulli(ph_mean)
                vk = ph_sample
            
            # Top-down pass (simplified)
            for i in range(self.n_layers-1, -1, -1):
                if i == 0:
                    vk_mean = torch.sigmoid(torch.matmul(ph_samples[i], self.weights[i].t()) + self.biases[i])
                    vk = torch.bernoulli(vk_mean)
                else:
                    # This is a simplified implementation
                    pass
        
        # Update weights and biases
        for i in range(self.n_layers):
            # Update weights
            dW = (torch.matmul(v0.t(), ph_means[i]) - torch.matmul(vk.t(), ph_samples[i])) / v0.size(0)
            self.weights[i] += self.learning_rate * dW
            
            # Update biases
            db = torch.mean(ph_means[i] - ph_samples[i], dim=0)
            self.biases[i+1] += self.learning_rate * db
            
            if i == 0:
                db = torch.mean(v0 - vk, dim=0)
                self.biases[i] += self.learning_rate * db
        
        # Calculate reconstruction error
        error = torch.mean((v0 - vk) ** 2)
        
        return error
    
    def train_batch(self, batch):
        """
        Train one batch
        """
        if self.use_cuda:
            batch = batch.cuda()
        
        return self.contrastive_divergence(batch)
    
    def generate_samples(self, n_samples, n_gibbs_steps=100):
        """
        Generate samples
        """
        # Initialize random states
        visible = torch.bernoulli(torch.ones(n_samples, self.layer_sizes[0]) * 0.5)
        
        if self.use_cuda:
            visible = visible.cuda()
        
        # Gibbs sampling
        for _ in range(n_gibbs_steps):
            # Bottom-up pass
            for i in range(self.n_layers):
                ph_mean = torch.sigmoid(torch.matmul(visible, self.weights[i]) + self.biases[i+1])
                ph_sample = torch.bernoulli(ph_mean)
                visible = ph_sample
            
            # Top-down pass (simplified)
            for i in range(self.n_layers-1, -1, -1):
                if i == 0:
                    vk_mean = torch.sigmoid(torch.matmul(visible, self.weights[i].t()) + self.biases[i])
                    visible = torch.bernoulli(vk_mean)
                else:
                    # This is a simplified implementation
                    pass
        
        return visible

# Create DBM
layer_sizes = [784, 256, 128]  # Visible layer, first hidden layer, second hidden layer
dbm = DeepBoltzmannMachine(layer_sizes, k=1, learning_rate=0.01, use_cuda=config.RBM_CONFIG['use_cuda'])

print(f"Created DBM with layers: {layer_sizes}")

In [None]:
# Load MNIST data
mnist_train_loader = load_mnist(batch_size=64, train=True)

# Train DBM
n_epochs = 5
errors = []

for epoch in range(n_epochs):
    batch_errors = []
    
    for batch_idx, (data, _) in enumerate(mnist_train_loader):
        if batch_idx >= 20:  # Only train the first 20 batches as an example
            break
            
        # Flatten data
        batch = data.view(data.size(0), -1)
        
        # Binarize data
        batch = (batch > 0.5).float()
        
        # Train one batch
        error = dbm.train_batch(batch)
        batch_errors.append(error)
    
    # Calculate average error
    avg_error = np.mean([e.item() for e in batch_errors])
    errors.append(avg_error)
    
    print(f"Epoch {epoch+1}/{n_epochs}, Error: {avg_error:.6f}")

# Plot training error
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(errors)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Reconstruction Error', fontsize=12)
ax.set_title('DBM Training Error', fontsize=14)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Conditional Generation

Conditional generation can generate specific types of samples based on conditions.

In [None]:
class ConditionalRBM(RBM):
    """
    Conditional Restricted Boltzmann Machine
    """
    def __init__(self, n_visible, n_hidden, n_condition, k=1, learning_rate=0.01, use_cuda=False):
        # Initialize parent class
        super().__init__(n_visible, n_hidden, k, learning_rate, use_cuda)
        
        self.n_condition = n_condition
        
        # Initialize condition to hidden layer weights
        self.W_cond = torch.randn(n_condition, n_hidden) * 0.01
        
        # Initialize condition bias
        self.b_cond = torch.zeros(n_condition)
        
        # Move to GPU if needed
        if self.use_cuda:
            self.W_cond = self.W_cond.cuda()
            self.b_cond = self.b_cond.cuda()
    
    def sample_h_given_v(self, v, condition=None):
        """
        Sample hidden layer given visible layer and condition
        """
        # Calculate activation probability
        activation = torch.matmul(v, self.W) + self.b
        
        # Add condition influence
        if condition is not None:
            activation += torch.matmul(condition, self.W_cond)
        
        # Calculate probability
        p_h = torch.sigmoid(activation)
        
        # Sample hidden layer
        return torch.bernoulli(p_h), p_h
    
    def sample_v_given_h(self, h):
        """
        Sample visible layer given hidden layer
        """
        # Calculate activation probability
        activation = torch.matmul(h, self.W.t()) + self.a
        
        # Calculate probability
        p_v = torch.sigmoid(activation)
        
        # Sample visible layer
        return torch.bernoulli(p_v), p_v
    
    def contrastive_divergence(self, v0, condition=None, k=None):
        """
        Contrastive divergence algorithm
        """
        if k is None:
            k = self.k
        
        # Positive phase
        ph, ph_mean = self.sample_h_given_v(v0, condition)
        
        # Negative phase
        h = ph
        for step in range(k):
            vk, _ = self.sample_v_given_h(h)
            h, _ = self.sample_h_given_v(vk, condition)
        
        vk, vk_mean = self.sample_v_given_h(h)
        ph, ph_mean = self.sample_h_given_v(vk, condition)
        
        # Update weights and biases
        dW = (torch.matmul(v0.t(), ph_mean) - torch.matmul(vk.t(), ph_mean)) / v0.size(0)
        self.W += self.learning_rate * dW
        
        da = torch.mean(v0 - vk, dim=0)
        self.a += self.learning_rate * da
        
        db = torch.mean(ph_mean - ph, dim=0)
        self.b += self.learning_rate * db
        
        # Update condition weights and biases
        if condition is not None:
            dW_cond = (torch.matmul(condition.t(), ph_mean) - torch.matmul(condition.t(), ph)) / v0.size(0)
            self.W_cond += self.learning_rate * dW_cond
            
            db_cond = torch.mean(condition, dim=0)
            self.b_cond += self.learning_rate * db_cond
        
        # Calculate reconstruction error
        error = torch.mean((v0 - vk) ** 2)
        
        return error
    
    def generate_samples(self, n_samples, condition=None, n_gibbs_steps=100):
        """
        Generate samples
        """
        # Initialize random visible layer
        v = torch.bernoulli(torch.ones(n_samples, self.n_visible) * 0.5)
        
        if self.use_cuda:
            v = v.cuda()
            if condition is not None:
                condition = condition.cuda()
        
        # Gibbs sampling
        for _ in range(n_gibbs_steps):
            _, h = self.sample_h_given_v(v, condition)
            v, _ = self.sample_v_given_h(h)
        
        return v

# Create conditional RBM
n_visible = 784
n_hidden = 256
n_condition = 10  # 10 categories of MNIST

conditional_rbm = ConditionalRBM(
    n_visible=n_visible,
    n_hidden=n_hidden,
    n_condition=n_condition,
    k=1,
    learning_rate=0.01,
    use_cuda=config.RBM_CONFIG['use_cuda']
)

print(f"Created conditional RBM with {n_visible} visible units, {n_hidden} hidden units, and {n_condition} condition units")

In [None]:
# Prepare conditional data
def prepare_conditional_data(data_loader, n_condition=10):
    """
    Prepare conditional data
    """
    conditional_data = []
    
    for data, labels in data_loader:
        # Flatten data
        batch = data.view(data.size(0), -1)
        
        # Binarize data
        batch = (batch > 0.5).float()
        
        # Create one-hot encoding for labels
        condition = torch.zeros(batch.size(0), n_condition)
        for i, label in enumerate(labels):
            condition[i, label] = 1.0
        
        conditional_data.append((batch, condition))
    
    return conditional_data

# Load MNIST data
mnist_train_loader = load_mnist(batch_size=64, train=True)
conditional_train_data = prepare_conditional_data(mnist_train_loader)

# Train conditional RBM
n_epochs = 5
errors = []

for epoch in range(n_epochs):
    batch_errors = []
    
    for batch_idx, (batch, condition) in enumerate(conditional_train_data):
        if batch_idx >= 20:  # Only train the first 20 batches as an example
            break
            
        # Train one batch
        error = conditional_rbm.contrastive_divergence(batch, condition)
        batch_errors.append(error)
    
    # Calculate average error
    avg_error = np.mean([e.item() for e in batch_errors])
    errors.append(avg_error)
    
    print(f"Epoch {epoch+1}/{n_epochs}, Error: {avg_error:.6f}")

# Plot training error
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(errors)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Reconstruction Error', fontsize=12)
ax.set_title('Conditional RBM Training Error', fontsize=14)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Generate conditional samples
def generate_conditional_samples(rbm, digit, n_samples=8, n_gibbs_steps=100):
    """
    Generate samples for a specific digit
    """
    # Create condition
    condition = torch.zeros(n_samples, 10)
    condition[:, digit] = 1.0
    
    # Generate samples
    samples = rbm.generate_samples(n_samples, condition, n_gibbs_steps)
    
    return samples

# Generate samples for each digit
fig, axes = plt.subplots(2, 5, figsize=(15, 6))

for digit in range(10):
    # Generate samples
    samples = generate_conditional_samples(conditional_rbm, digit, n_samples=1)
    
    # Convert to image
    image = binary_to_image(samples.detach().cpu().numpy(), (28, 28))[0]
    
    # Display
    row = digit // 5
    col = digit % 5
    axes[row, col].imshow(image, cmap='binary')
    axes[row, col].axis('off')
    axes[row, col].set_title(f'Digit {digit}', fontsize=12)

plt.tight_layout()
plt.show()

## 4. Transfer Learning

Transfer learning can apply a model trained on one dataset to another dataset.

In [None]:
# Train a basic RBM on MNIST
mnist_train_loader = load_mnist(batch_size=64, train=True)

rbm = RBM(
    n_visible=784,
    n_hidden=256,
    k=1,
    learning_rate=0.01,
    use_cuda=config.RBM_CONFIG['use_cuda']
)

# Train RBM
n_epochs = 5
errors = []

for epoch in range(n_epochs):
    batch_errors = []
    
    for batch_idx, (data, _) in enumerate(mnist_train_loader):
        if batch_idx >= 20:  # Only train the first 20 batches as an example
            break
            
        # Flatten data
        batch = data.view(data.size(0), -1)
        
        # Binarize data
        batch = (batch > 0.5).float()
        
        if rbm.use_cuda:
            batch = batch.cuda()
        
        # Train one batch
        error = rbm.train_batch(batch)
        batch_errors.append(error)
    
    # Calculate average error
    avg_error = np.mean([e.item() for e in batch_errors])
    errors.append(avg_error)
    
    print(f"Epoch {epoch+1}/{n_epochs}, Error: {avg_error:.6f}")

print("Basic RBM training completed!")

In [None]:
class TransferRBM(RBM):
    """
    Transfer Learning RBM
    """
    def __init__(self, pretrained_rbm, freeze_hidden=False):
        # Initialize new RBM
        super().__init__(
            n_visible=pretrained_rbm.n_visible,
            n_hidden=pretrained_rbm.n_hidden,
            k=pretrained_rbm.k,
            learning_rate=pretrained_rbm.learning_rate,
            momentum=pretrained_rbm.momentum,
            weight_decay=pretrained_rbm.weight_decay,
            use_cuda=pretrained_rbm.use_cuda
        )
        
        # Copy pretrained weights
        self.W.data = pretrained_rbm.W.data.clone()
        self.b.data = pretrained_rbm.b.data.clone()
        self.a.data = pretrained_rbm.a.data.clone()
        
        # Freeze hidden layer weights (optional)
        if freeze_hidden:
            self.W.requires_grad = False
            self.b.requires_grad = False
    
    def fine_tune(self, data_loader, n_epochs=5, lr_factor=0.1):
        """
        Fine-tune model
        """
        # Lower learning rate
        original_lr = self.learning_rate
        self.learning_rate = original_lr * lr_factor
        
        errors = []
        
        for epoch in range(n_epochs):
            batch_errors = []
            
            for batch_idx, (data, _) in enumerate(data_loader):
                # Flatten data
                batch = data.view(data.size(0), -1)
                
                if self.use_cuda:
                    batch = batch.cuda()
                
                # Train one batch
                error = self.train_batch(batch)
                batch_errors.append(error)
            
            # Calculate average error
            avg_error = np.mean(batch_errors)
            errors.append(avg_error)
            
            print(f"Fine-tune Epoch {epoch+1}/{n_epochs}, Error: {avg_error:.6f}")
        
        # Restore original learning rate
        self.learning_rate = original_lr
        
        return errors

# Use previously trained RBM for transfer learning
transfer_rbm = TransferRBM(rbm, freeze_hidden=False)

# Fine-tune
fine_tune_errors = transfer_rbm.fine_tune(fashion_train_loader, n_epochs=5)

print("Transfer learning completed!")

In [None]:
# Compare transfer learning vs training from scratch
print("Comparing transfer learning vs training from scratch...")

# Train a new RBM from scratch
scratch_rbm = RBM(
    n_visible=784,
    n_hidden=256,
    k=1,
    learning_rate=0.01,
    use_cuda=config.RBM_CONFIG['use_cuda']
)

# Train new RBM
scratch_errors = []
n_epochs = 5

for epoch in range(n_epochs):
    batch_errors = []
    
    for batch_idx, (data, _) in enumerate(fashion_train_loader):
        if batch_idx >= 20:  # Only train the first 20 batches as an example
            break
            
        # Flatten data
        batch = data.view(data.size(0), -1)
        
        if scratch_rbm.use_cuda:
            batch = batch.cuda()
        
        # Train one batch
        error = scratch_rbm.train_batch(batch)
        batch_errors.append(error)
    
    # Calculate average error
    avg_error = np.mean(batch_errors)
    scratch_errors.append(avg_error)
    
    print(f"Scratch Epoch {epoch+1}/{n_epochs}, Error: {avg_error:.6f}")

# Plot comparison results
fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(fine_tune_errors, 'b-o', label='Transfer Learning', linewidth=2)
ax.plot(scratch_errors, 'r-s', label='Training from Scratch', linewidth=2)

ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Reconstruction Error', fontsize=12)
ax.set_title('Transfer Learning vs Training from Scratch', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Generate samples for comparison
print("Generating samples for comparison...")

# Transfer learning RBM generates samples
transfer_samples = transfer_rbm.generate_samples(16, n_gibbs_steps=100)
transfer_images = binary_to_image(transfer_samples.detach().cpu().numpy(), (28, 28))

# Training from scratch RBM generates samples
scratch_samples = scratch_rbm.generate_samples(16, n_gibbs_steps=100)
scratch_images = binary_to_image(scratch_samples.detach().cpu().numpy(), (28, 28))

# Display samples
fig, axes = plt.subplots(2, 8, figsize=(16, 4))

# Display transfer learning samples
for i in range(8):
    axes[0, i].imshow(transfer_images[i], cmap='binary')
    axes[0, i].axis('off')
    if i == 0:
        axes[0, i].set_title('Transfer Learning', fontsize=12)

# Display training from scratch samples
for i in range(8):
    axes[1, i].imshow(scratch_images[i], cmap='binary')
    axes[1, i].axis('off')
    if i == 0:
        axes[1, i].set_title('Training from Scratch', fontsize=12)

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrates some advanced experiments and extension features, including:

1. **Continuous Hopfield Network**: Hopfield network using continuous dynamics equations
2. **Deep Boltzmann Machine**: Multi-layer structure Boltzmann machine
3. **Conditional Generation**: Generate specific types of samples based on conditions
4. **Transfer Learning**: Apply a model trained on one dataset to another dataset

These extensions demonstrate the flexibility and powerful functions of energy models and generative models.