In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# RBM model definition
class RBM(nn.Module):
    def __init__(self, n_visible=784, n_hidden=500):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hidden, n_visible) * 0.1)
        self.h = nn.Parameter(torch.zeros(n_hidden))
        self.v = nn.Parameter(torch.zeros(n_visible))
    
    def v_to_h(self, v):
        h_probs = torch.sigmoid((v @ self.W.T) + self.h)
        h_value = torch.bernoulli(h_probs)
        return h_value, h_probs

    def h_to_v(self, h):
        v_probs = torch.sigmoid((h @ self.W) + self.v)
        v_value = torch.bernoulli(v_probs)
        return v_value, v_probs

    def forward(self, v):
        h_value, h_probs = self.v_to_h(v)
        v_value, v_probs = self.h_to_v(h_value)
        return v_value

    def contrastive_divergence(self, v, lr=0.01):
        h_value, h_probs = self.v_to_h(v)
        v_value, v_probs = self.h_to_v(h_value)
        h_value2, h_probs2 = self.v_to_h(v_probs)

        # Update parameters
        self.W.data += lr * ((h_probs.T @ v) - (h_probs2.T @ v_probs)) / v.size(0)
        self.v.data += lr * (v - v_probs).mean(0)
        self.h.data += lr * (h_probs - h_probs2).mean(0)

# Load MNIST
transform = transforms.Compose([transforms.ToTensor(), transforms.Lambda(lambda x: x.view(-1))])
train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# Initialize model
rbm = RBM(n_visible=784, n_hidden=256)

# Training
epochs = 5
for epoch in range(epochs):
    epoch_loss = 0
    for batch, _ in train_loader:
        batch = batch.bernoulli()  # Binarize input
        rbm.contrastive_divergence(batch)
        loss = torch.mean((batch - rbm(batch)) ** 2)
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {epoch_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.1272
Epoch 2, Loss: 0.0906
Epoch 3, Loss: 0.0810
Epoch 4, Loss: 0.0753
Epoch 5, Loss: 0.0715


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define RBM class
class RBM(nn.Module):
    def __init__(self, visible, hidden):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(hidden, visible) * 0.1)
        self.h_bias = nn.Parameter(torch.zeros(hidden))
        self.v_bias = nn.Parameter(torch.zeros(visible))
        
    def sample_h(self, v):
        h_prob = torch.sigmoid(F.linear(v, self.W, self.h_bias))
        return h_prob, torch.bernoulli(h_prob)
        
    def sample_v(self, h):
        v_prob = torch.sigmoid(F.linear(h, self.W.t(), self.v_bias))
        return v_prob, torch.bernoulli(v_prob)
   
    def contrastive_derivative(self, v, lr=0.1):
        v = v.to(device)
        h_prob, h_sample = self.sample_h(v)
        v_prob, _ = self.sample_v(h_sample)
        h_prob_neg, _ = self.sample_h(v_prob)
 
        self.W.data += lr * (torch.matmul(h_prob.t(), v) - torch.matmul(h_prob_neg.t(), v_prob)) / v.size(0)
        self.v_bias.data += lr * torch.sum(v - v_prob, dim=0) / v.size(0)
        self.h_bias.data += lr * torch.sum(h_prob - h_prob_neg, dim=0) / v.size(0)
        return torch.mean((v - v_prob) ** 2)

# Define DBN class
class DBN(nn.Module):
    def __init__(self, layers):
        super(DBN, self).__init__()
        self.rbm = nn.ModuleList([RBM(layers[i], layers[i+1]) for i in range(len(layers)-1)])
        self.classifier = nn.Sequential(
            nn.Linear(layers[-1], 10),
            nn.Softmax(dim=1)
        )
        self.to(device)
        
    def pre_train(self, data, epochs=10, batch_size=100):
        images = data.data.float() / 255.0  # Normalize to [0, 1]
        images = images.view(-1, images.size(1) * images.size(2)).to(device)
        for i, rbm in enumerate(self.rbm):
            rbm.to(device)
            print(f"Pre-training RBM layer {i+1}/{len(self.rbm)}")
            for epoch in range(epochs):
                epoch_loss = 0
                for batch in DataLoader(images, batch_size=batch_size, shuffle=True):
                    batch = batch.view(batch.size(0), -1).to(device)
                    loss = rbm.contrastive_derivative(batch)
                    epoch_loss += loss.item()
                print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(DataLoader(images, batch_size=batch_size)):.4f}")
            h_prob, _ = rbm.sample_h(images)
            images = h_prob  # Pass hidden activations to next RBM
            
    def forward(self, x):
        x = x.view(x.size(0), -1).to(device)
        for rbm in self.rbm:
            h_prob, _ = rbm.sample_h(x)
            x = h_prob
        return self.classifier(x)
   
    def finetune(self, train_data, test_data, epochs=10, batch_size=64):
        optimizer = optim.SGD(self.parameters(), lr=0.1)
        criterion = nn.CrossEntropyLoss()
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
        
        for epoch in range(epochs):
            self.train()
            epoch_loss = 0
            for images, targets in train_loader:
                images, targets = images.to(device), targets.to(device)
                images = images.view(images.size(0), -1)
                optimizer.zero_grad()
                outputs = self.forward(images)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
            
            # Evaluate on test set
            self.eval()
            correct = 0
            total = 0
            with torch.no_grad():
                for images, targets in test_loader:
                    images, targets = images.to(device), targets.to(device)
                    images = images.view(images.size(0), -1)
                    outputs = self.forward(images)
                    _, predicted = torch.max(outputs, 1)
                    total += targets.size(0)
                    correct += (predicted == targets).sum().item()
            
            accuracy = 100 * correct / total
            print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}, Test Accuracy: {accuracy:.2f}%")

# Load MNIST dataset
transform = transforms.ToTensor()
train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

# Initialize DBN
layers = [784, 500, 200]  # Visible: 784 (28x28), Hidden: 500, 200
dbn = DBN(layers)

# Pre-train DBN
print("Starting pre-training...")
dbn.pre_train(train_dataset)

# Fine-tune DBN
print("Starting fine-tuning...")
dbn.finetune(train_dataset, test_dataset)

Using device: cuda
Starting pre-training...
Pre-training RBM layer 1/2
Epoch 1/10, Loss: 0.0213
Epoch 2/10, Loss: 0.0133
Epoch 3/10, Loss: 0.0116
Epoch 4/10, Loss: 0.0108
Epoch 5/10, Loss: 0.0103
Epoch 6/10, Loss: 0.0100
Epoch 7/10, Loss: 0.0097
Epoch 8/10, Loss: 0.0096
Epoch 9/10, Loss: 0.0094
Epoch 10/10, Loss: 0.0093
Pre-training RBM layer 2/2
Epoch 1/10, Loss: 0.0453
Epoch 2/10, Loss: 0.0296
Epoch 3/10, Loss: 0.0267
Epoch 4/10, Loss: 0.0253
Epoch 5/10, Loss: 0.0242
Epoch 6/10, Loss: 0.0233
Epoch 7/10, Loss: 0.0227
Epoch 8/10, Loss: 0.0220
Epoch 9/10, Loss: 0.0214
Epoch 10/10, Loss: 0.0209
Starting fine-tuning...
Epoch 1/10, Loss: 1.8260, Test Accuracy: 89.52%
Epoch 2/10, Loss: 1.6188, Test Accuracy: 91.29%
Epoch 3/10, Loss: 1.5880, Test Accuracy: 92.13%
Epoch 4/10, Loss: 1.5736, Test Accuracy: 92.73%
Epoch 5/10, Loss: 1.5644, Test Accuracy: 93.09%
Epoch 6/10, Loss: 1.5578, Test Accuracy: 93.39%
Epoch 7/10, Loss: 1.5527, Test Accuracy: 93.63%
Epoch 8/10, Loss: 1.5485, Test Accuracy: