In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import tarfile
import pandas as pd
import os
import re
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.metrics import confusion_matrix

In [3]:
class DatasetClass(Dataset):
    
    def __init__(self, folder, filename, k, label_dict):
        
        self.filename = filename
        self.data = pd.read_csv(folder + '\\' + filename, header=None)
        self.y = torch.tensor(label_dict[self.filename.rstrip('.csv')], dtype=torch.long)
    
    def __getitem__(self, idx):     
        
        return torch.tensor(self.data.iloc[idx], dtype=torch.float), self.y
      
    def __len__(self):
        
        return len(self.data)

In [4]:
def train_test_loader(directory, train_fraction=0.8, num_workers=2, batch_size=32):

    files = list(filter(lambda x: x.endswith('.csv') and x[0].isupper(), os.listdir(directory)))
    label_dict = {}

    i = 0
    for file in files:
        label_dict[file.rstrip('.csv')] = i
        i += 1

    datasets = list(map(lambda x : DatasetClass(directory, x, len(files), label_dict), files))
    dataset = ConcatDataset(datasets)
    N = dataset.cumulative_sizes[-1]
    
    train_size = int(N*train_fraction)
    test_size = N - train_size

    train_data, test_data = torch.utils.data.random_split(dataset, [train_size, test_size])

    trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    testloader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    
    return trainloader, testloader

In [5]:
trainloader, testloader = train_test_loader('Data_Set_2(Black_and_white_images)', train_fraction=0.8, num_workers=0)

In [None]:
# class RBM(nn.Module):
#     def __init__(self, num_v, num_h, k, device, lr_w=0.01, lr_bias=0.001):
        
#         super(RBM, self).__init__()
#         self.W = torch.randn(num_h,num_v).to(device)
#         self.b = torch.zeros(num_v).to(device)  # biases of visible nodes
#         self.c = torch.zeros(num_h).to(device)  # biases of hidden nodes
#         self.k = k                                 # k-step divergence
#         self.device = device
#         self.lr_w = lr_w
#         self.lr_bias = lr_bias
    
#     def get_h(self, v):
#         # activation of the i'th hidden node
#         a_i = F.linear(v, self.W, self.c).to(self.device) 
#         P_h_given_v = torch.sigmoid(a_i)
#         return P_h_given_v, torch.bernoulli(P_h_given_v)
    
#     def get_v(self, h):
#         # activation of the j'th visible node
#         a_j = F.linear(h, self.W.t(), self.b).to(self.device)
#         P_v_given_h = torch.sigmoid(a_j)
#         return P_v_given_h, torch.bernoulli(P_v_given_h)
    
#     def update_step(self, x):
        
#         v0 = x
#         _, h = self.get_h(v0)
        
#         for i in range(self.k):
#             _, v = self.get_v(h)
#             _, h = self.get_h(v)
            
#         p_h_given_v0, _ = self.get_h(v0)
#         p_h_given_vk, _ = self.get_h(v)
        
#         self.W = self.W + self.lr*(torch.matmul(p_h_given_v0.t(), v0) + torch.matmul(p_h_given_vk.t(), v))
#         self.b = self.b + self.lr*(v0 - v) 
#         self.c = self.c + self.lr*(p_h_given_v0 - p_h_given_vk)
        
#         return v

In [6]:
class BBRBM:
    
    def __init__(self, visible_nodes, h_len, lr_W=0.01, lr_bias=0.001):
        
        # set lower lr for bias than for the weights
        
        self.N = visible_nodes.shape[0]        
        v_len = visible_nodes.shape[1]
        self.W = torch.randn(v_len, h_len).to(device)
        self.b = torch.randn(1, v_len).to(device)
        self.c = torch.randn(1, h_len).to(device)
        self.V = visible_nodes.to(device)
        self.lr_W = lr_W
        self.lr_bias = lr_bias        
        
    def get_h(self, v):
        
        a = torch.mm(v.view(1,-1), self.W) + self.c
        f = torch.nn.Sigmoid()
        p_h_v = f(a)
        return p_h_v, torch.bernoulli(p_h_v)
    
    def get_v(self, h):
        a = torch.mm(h.view(1,-1), self.W.T) + self.b
        f = torch.nn.Sigmoid()
        p_v_h = f(a)
        return p_v_h, torch.bernoulli(p_v_h)
    
    def params_update(self, p_h_v0, p_h_vk, v0, vk):
        self.W += self.lr_W*(torch.mm(v0.view(-1,1), p_h_v0) - torch.mm(vk.view(-1,1), p_h_vk))/self.N
        self.b += self.lr_bias*(v0 - vk)/self.N
        self.c += self.lr_bias*(p_h_v0 - p_h_vk)/self.N
        
    def one_epoch(self, k):

        for v0 in self.V:
            v_t = v0
            for t in range(k):  
                p_h_vt, h_t = self.get_h(v_t)
                if t==0:
                    p_h_v0 = p_h_vt                    
                p_v_ht, v_t1 = self.get_v(h_t)
                v_t = v_t1

            try:
                V_k = torch.cat((V_k, v_t.view(1,-1)), dim=0)
                H_k = torch.cat((H_k, h_t.view(1,-1)), dim=0)
            except:
                V_k = v_t.view(1,-1)
                H_k = h_t.view(1,-1)

            self.params_update(p_h_v0, p_h_vt, v0, v_t)

        return V_k, H_k
        
    def train(self, k, eps=1e-3):
        ep = 0
        error_old = np.inf
        while True:
            ep += 1
            ## Check if error should be SSE?
            V_k, H_k = self.one_epoch(k)
            error_new = torch.sum((V_k - self.V)**2) 
            print('Epoch: {0}, Error: {1}'.format(ep, error_new))
            
            if abs(error_new - error_old)/error_old <= eps:
                print('Converged!')
                self.V_train = V_k
                self.H_train = H_k                
                break
            error_old = error_new

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [8]:
v = list(trainloader)[0][0]

In [10]:
rbm = BBRBM(v, h_len=500)
rbm.train(k=50, eps=1e-5)

Epoch: 1, Error: 292489280.0
Epoch: 2, Error: 292347232.0
Epoch: 3, Error: 292344960.0
Converged!


In [11]:
v_new = rbm.H_train

In [12]:
rbm2 = BBRBM(v_new, h_len=350)
rbm2.train(k=50)

Epoch: 1, Error: 5862.0
Epoch: 2, Error: 5462.0
Epoch: 3, Error: 4926.0
Epoch: 4, Error: 4250.0
Epoch: 5, Error: 3591.0
Epoch: 6, Error: 2831.0
Epoch: 7, Error: 2344.0
Epoch: 8, Error: 1721.0
Epoch: 9, Error: 1351.0
Epoch: 10, Error: 1000.0
Epoch: 11, Error: 756.0
Epoch: 12, Error: 684.0
Epoch: 13, Error: 554.0
Epoch: 14, Error: 415.0
Epoch: 15, Error: 371.0
Epoch: 16, Error: 357.0
Epoch: 17, Error: 351.0
Epoch: 18, Error: 288.0
Epoch: 19, Error: 265.0
Epoch: 20, Error: 230.0
Epoch: 21, Error: 197.0
Epoch: 22, Error: 200.0
Epoch: 23, Error: 189.0
Epoch: 24, Error: 197.0
Epoch: 25, Error: 184.0
Epoch: 26, Error: 171.0
Epoch: 27, Error: 151.0
Epoch: 28, Error: 149.0
Epoch: 29, Error: 128.0
Epoch: 30, Error: 143.0
Epoch: 31, Error: 152.0
Epoch: 32, Error: 131.0
Epoch: 33, Error: 131.0
Converged!


In [13]:
v_new2 = rbm2.H_train
rbm3 = BBRBM(v_new2, h_len=200)
rbm3.train(k=50)

Epoch: 1, Error: 4762.0
Epoch: 2, Error: 4473.0
Epoch: 3, Error: 4066.0
Epoch: 4, Error: 3749.0
Epoch: 5, Error: 3436.0
Epoch: 6, Error: 3034.0
Epoch: 7, Error: 2705.0
Epoch: 8, Error: 2472.0
Epoch: 9, Error: 2058.0
Epoch: 10, Error: 1892.0
Epoch: 11, Error: 1676.0
Epoch: 12, Error: 1451.0
Epoch: 13, Error: 1285.0
Epoch: 14, Error: 1015.0
Epoch: 15, Error: 876.0
Epoch: 16, Error: 808.0
Epoch: 17, Error: 591.0
Epoch: 18, Error: 518.0
Epoch: 19, Error: 444.0
Epoch: 20, Error: 439.0
Epoch: 21, Error: 371.0
Epoch: 22, Error: 345.0
Epoch: 23, Error: 338.0
Epoch: 24, Error: 280.0
Epoch: 25, Error: 255.0
Epoch: 26, Error: 251.0
Epoch: 27, Error: 202.0
Epoch: 28, Error: 201.0
Epoch: 29, Error: 176.0
Epoch: 30, Error: 176.0
Converged!


In [14]:
class FinalNet(nn.Module):
    
    def __init__(self, input_size, hidden_sizes, num_classes):
        super(FinalNet, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.out = nn.Linear(hidden_sizes[2], num_classes)
    
    def forward(self, x):
        
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        x = self.out(x)
        
        return x
    
    def predict(self, X):
        
        with torch.no_grad():
            y_score = self.forward(X)
            y_pred = torch.argmax(y_score, axis=1)
            
        return y_pred 

In [15]:
classifier = FinalNet(v.shape[1], [500,350,200], 10)

In [20]:
with torch.no_grad():
    
    classifier.fc1.weight.data = nn.Parameter(rbm.W)
    classifier.fc1.bias.data = nn.Parameter(rbm.b)
    
    classifier.fc2.weight = nn.Parameter(rbm2.W)
    classifier.fc2.bias = nn.Parameter(rbm2.b)
    
    classifier.fc3.weight = nn.Parameter(rbm3.W)
    classifier.fc3.bias = nn.Parameter(rbm3.b)

In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9)
classifier = classifier.to(device)

In [22]:
trainloader, testloader = train_test_loader('Data_Set_2(Black_and_white_images)', train_fraction=0.8, num_workers=0)

In [23]:
old_loss = np.inf

max_epoch = 500

for epoch in range(max_epoch):

    running_loss = 0.0
    
    for data in trainloader:
        
        X, y = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        
        # Forward
        y_hat = classifier(X)
        
        # Calculate Loss (Cross Entropy)
        loss = criterion(y_hat, y)
        
        # Backpropagation
        loss.backward()
        
        # Update Parameters
        optimizer.step()
        
        running_loss += loss.item()
    
    print('Epoch', epoch+1, ': Loss =', running_loss)
    
    if abs(running_loss-old_loss)/running_loss < 1e-3:
        print('Converged')
        break
    
    old_loss = running_loss

print('Finished Training')

RuntimeError: size mismatch, m1: [32 x 784], m2: [500 x 784] at C:/w/b/windows/pytorch/aten/src\THC/generic/THCTensorMathBlas.cu:283

In [41]:
rbm.W.shape

torch.Size([784, 500])

In [24]:
rbm2.W.shape

torch.Size([500, 350])

In [25]:
rbm3.W.shape

torch.Size([350, 200])