In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class RBM(nn.Module):
    def __init__(self, n_visible, n_hidden):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hidden, n_visible) * 0.01)
        self.a = nn.Parameter(torch.zeros(n_hidden))
        self.b = nn.Parameter(torch.zeros(n_visible))

    def sample_hidden(self, v):
        wx_b = torch.mm(v, self.W.t()) + self.a
        h_prob = torch.sigmoid(wx_b.clamp(-10, 10))
        print(h_prob)
        return h_prob, torch.bernoulli(h_prob)

    def sample_visible(self, h):
        wx_b = torch.mm(h, self.W) + self.b
        v_prob = torch.sigmoid(wx_b.clamp(-10, 10))
        return v_prob, torch.bernoulli(v_prob)

    def energy(self, v, h):
        return -torch.sum(v * torch.mm(h, self.W)) - torch.sum(self.a * h) - torch.sum(self.b * v)

    def free_energy(self, v):
        term_1 = -torch.mm(v, self.b.unsqueeze(1))
        wx_b = torch.mm(v, self.W.t()) + self.a
        term_2 = torch.sum(torch.log(1 + torch.exp(wx_b)))
        return term_1 + term_2

    def forward(self, v):
        _, h = self.sample_hidden(v)
        _, v_reconstructed = self.sample_visible(h)
        return v_reconstructed

    def contrastive_divergence(self, v, k=1):
        h0_prob, h0 = self.sample_hidden(v)
        vk = v

        for _ in range(k):
            _, hk = self.sample_hidden(vk)
            _, vk = self.sample_visible(hk)

        hk_prob, hk = self.sample_hidden(vk)

        # Positive phase
        positive_grad = torch.mm(h0.t(), v)
        
        # Negative phase
        negative_grad = torch.mm(hk.t(), vk)

        return positive_grad, negative_grad

def train_rbm(rbm, data_loader, epochs=10, learning_rate=0.001, k=1):
    optimizer = optim.SGD(rbm.parameters(), lr=learning_rate)
    
    for epoch in range(epochs):
        for batch in data_loader:
            v = batch[0]
            positive_grad, negative_grad = rbm.contrastive_divergence(v, k)
            
            loss = rbm.free_energy(v).mean() - rbm.free_energy(rbm(v)).mean()
            
            optimizer.zero_grad()
            loss.backward()

            for name, param in rbm.named_parameters():
                if param.requires_grad:
                    param.grad.data.clamp_(-1, 1)  # Example limits, adjust as needed

            optimizer.step()



            rbm.W.data.clamp_(-4, 4)  # Clip weights to prevent exploding gradients

        if epoch % 10 == 0:
            print(f"Epoch: {epoch}, Loss: {loss.item()}")



In [23]:
# Example usage for pretraining a DBN with 3 RBM layers (784-500-250-30)
# Assuming data is MNIST-like with 784 features
train_data = torch.randn(1000, 784)  # Example data, replace with your dataset
train_dataset = TensorDataset(train_data)
data_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

layers = [(784, 500), (500, 250), (250, 30)]
dbn = []

for n_visible, n_hidden in layers:
    rbm = RBM(n_visible, n_hidden)
    train_rbm(rbm, data_loader, epochs=100)
    dbn.append(rbm)
    
    # Prepare data for the next RBM by sampling from the hidden layer
    with torch.no_grad():
        train_data = torch.stack([rbm.sample_hidden(batch[0])[1] for batch in data_loader]).view(-1, n_hidden)
    train_dataset = TensorDataset(train_data)
    data_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

print("DBN Pretraining completed.")

tensor([[0.4806, 0.4675, 0.5798,  ..., 0.5638, 0.5263, 0.5490],
        [0.6538, 0.4692, 0.4925,  ..., 0.4068, 0.4441, 0.5811],
        [0.5481, 0.5779, 0.3993,  ..., 0.4779, 0.5322, 0.4566],
        ...,
        [0.5047, 0.5063, 0.4799,  ..., 0.4341, 0.5370, 0.4769],
        [0.4939, 0.5557, 0.4577,  ..., 0.5434, 0.6175, 0.5826],
        [0.5308, 0.4782, 0.5684,  ..., 0.4605, 0.4596, 0.4518]],
       grad_fn=<SigmoidBackward0>)
tensor([[0.4806, 0.4675, 0.5798,  ..., 0.5638, 0.5263, 0.5490],
        [0.6538, 0.4692, 0.4925,  ..., 0.4068, 0.4441, 0.5811],
        [0.5481, 0.5779, 0.3993,  ..., 0.4779, 0.5322, 0.4566],
        ...,
        [0.5047, 0.5063, 0.4799,  ..., 0.4341, 0.5370, 0.4769],
        [0.4939, 0.5557, 0.4577,  ..., 0.5434, 0.6175, 0.5826],
        [0.5308, 0.4782, 0.5684,  ..., 0.4605, 0.4596, 0.4518]],
       grad_fn=<SigmoidBackward0>)
tensor([[0.5032, 0.5126, 0.4411,  ..., 0.4863, 0.5333, 0.5071],
        [0.4593, 0.5161, 0.4428,  ..., 0.4610, 0.4686, 0.5516],
      

RuntimeError: Expected p_in >= 0 && p_in <= 1 to be true, but got false.  (Could this error message be improved?  If so, please report an enhancement request to PyTorch.)