In [1]:
import numpy as np 
import torch
from torch.utils.data import DataLoader, TensorDataset
import torchvision
from torchvision import transforms

In [2]:
train_dataset = torchvision.datasets.FashionMNIST(root='./', train=True, download=True, transform=transforms.ToTensor())
test_dataset = torchvision.datasets.FashionMNIST(root='./', train=False, download=True, transform=transforms.ToTensor())

In [3]:
train_dataset, test_dataset

(Dataset FashionMNIST
     Number of datapoints: 60000
     Root location: ./
     Split: Train
     StandardTransform
 Transform: ToTensor(),
 Dataset FashionMNIST
     Number of datapoints: 10000
     Root location: ./
     Split: Test
     StandardTransform
 Transform: ToTensor())

In [4]:
train_labels = train_dataset.targets
test_labels = test_dataset.targets

train_dataset = train_dataset.data
test_dataset = test_dataset.data

In [5]:
# sample data before formatting
train_dataset[0]

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   1,   0,
           0,  13,  73,   0,   0,   1,   4,   0,   0,   0,   0,   1,   1,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,   0,
          36, 136, 127,  62,  54,   0,   0,   0,   1,   3,   4,   0,   0,   3],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   6,   0,
         102, 204, 176, 134, 144, 123,  23,   0,   0,   0,   0,  12,  10,   0],
        [  0,   0,   0,   0,   0,   0,   0,   

In [6]:
def format(data):
    d = data.view(len(data), -1)
    d = (d > 127).to(torch.int)
    
    return d

In [7]:
train_data = format(train_dataset)
test_data = format(test_dataset)

In [8]:
# sample data after formatting
train_data[0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
        0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,

In [9]:
train_data = TensorDataset(train_data)
train_data = DataLoader(train_data, 100, shuffle=False)

test_data = TensorDataset(test_data)
test_data = DataLoader(test_data, 100, shuffle=False)

In [10]:
for i in test_data:
    print(i)
    break

[tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.int32)]


In [11]:
# Model

class RBM:
    def __init__(self, visual_dim, hidden_dim):
        self.visual_dim = visual_dim # visual dimension - 784
        self.hidden_dim = hidden_dim # Hidden dimension - can be changed
        
        self.w = np.random.randn(self.visual_dim, self.hidden_dim)
        self.v = np.ones(self.visual_dim) # bias vector for visual nodes
        self.h = np.ones(self.hidden_dim) # bias vector for hidden nodes
        
    def logistic(self, x):
        """
            Computes the sigmoid of the input x
        """
        return 1 / (1 + np.exp(-x))
        
    def train(self, data, k, max_epochs, lr=0.1):
        """
            data: Training data: (N, 784)
            k: decides the number of steps of Gibbs Sampling
            max_epochs: Number of Training epochs
            lr: learning rate
        """
        # count = 0
        for epoch in range(max_epochs):
            epoch_loss = 0.0
            
            for batch in data:
                # count+= 1
                batch_loss = 0.0
                batch = batch[0].numpy()
                # Positive phase:
                """
                    Sampling the hidden states given the visible values; hidden states sampled from the 
                    True distribution of the data.
                """
                hidden_activations = np.matmul(batch, self.w) + self.h
                hidden_prob = self.logistic(hidden_activations)
                
                hidden_states = hidden_prob > np.random.rand(len(batch), self.hidden_dim)
                
                h_states = hidden_states.copy()
                # Negative Phase
                for i in range(k):
                    visible_activations = np.matmul(h_states, self.w.T) + self.v
                    visible_prob = self.logistic(visible_activations)
                    visible_states = visible_prob > np.random.rand(len(batch), self.visual_dim)
                    
                    neg_hidden_activations = np.matmul(visible_states, self.w) + self.h
                    neg_hidden_prob = self.logistic(neg_hidden_activations)
                    
                    h_states = neg_hidden_prob > np.random.rand(len(batch), self.hidden_dim)
                
                pos_associations = np.dot(batch.T, hidden_prob)/ len(batch) # could use hidden_states as well
                neg_associations = np.dot(visible_states.T, neg_hidden_prob)/ len(batch) # could use the neg_hidden_states as well
                
                # update weights
                dw = pos_associations - neg_associations
                dv = np.mean(batch - visible_states)
                dh = np.mean(hidden_prob - neg_hidden_prob)
                
                self.w+= lr*dw
                self.v+= lr*dv
                self.h+= lr*dh
                
                batch_loss = np.mean((batch - visible_prob)**2)
                epoch_loss+= batch_loss
                # print(f"Batch loss: {batch_loss}")
            print("Epoch %s: error is %s" % (epoch, epoch_loss))
            # print(f"Batch Count: {count}")

In [12]:
model = RBM(784, 256)
model.train(train_data, 1, 1000)

Epoch 0: error is 64.91797052856151
Epoch 1: error is 53.78766281746602
Epoch 2: error is 51.767851092091554
Epoch 3: error is 51.169480430150045
Epoch 4: error is 50.72386269581405
Epoch 5: error is 50.316652898667165
Epoch 6: error is 49.838465799103766
Epoch 7: error is 49.54890567911321
Epoch 8: error is 49.399647039848944
Epoch 9: error is 49.339467345890185
Epoch 10: error is 49.35714199328457
Epoch 11: error is 49.3968838045093
Epoch 12: error is 49.58394707514329
Epoch 13: error is 49.78345615784142
Epoch 14: error is 49.76096007920965


  return 1 / (1 + np.exp(-x))


Epoch 15: error is 49.77665021683989
Epoch 16: error is 49.825688770430624
Epoch 17: error is 49.841868467099076
Epoch 18: error is 49.900794598726335
Epoch 19: error is 49.87045538729056
Epoch 20: error is 49.82827431597228
Epoch 21: error is 49.82188850442284
Epoch 22: error is 49.79008269481665
Epoch 23: error is 49.75975211955241


KeyboardInterrupt: 