In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import tarfile
import pandas as pd
import os
import re
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.metrics import confusion_matrix

In [2]:
class DatasetClass(Dataset):
    
    def __init__(self, folder, filename, k, label_dict):
        
        self.filename = filename
        self.data = pd.read_csv(folder + '\\' + filename, header=None)
        self.y = torch.tensor(label_dict[self.filename.rstrip('.csv')], dtype=torch.long)
    
    def __getitem__(self, idx):     
        
        return torch.tensor(self.data.iloc[idx], dtype=torch.float), self.y
      
    def __len__(self):
        
        return len(self.data)

In [3]:
def train_test_loader(directory, train_fraction=0.8, num_workers=2, batch_size=32):

    files = list(filter(lambda x: x.endswith('.csv') and x[0].isupper(), os.listdir(directory)))
    label_dict = {}

    i = 0
    for file in files:
        label_dict[file.rstrip('.csv')] = i
        i += 1

    datasets = list(map(lambda x : DatasetClass(directory, x, len(files), label_dict), files))
    dataset = ConcatDataset(datasets)
    N = dataset.cumulative_sizes[-1]
    
    train_size = int(N*train_fraction)
    test_size = N - train_size
    #print(train_size)
    train_data, test_data = torch.utils.data.random_split(dataset, [train_size, test_size])

    trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    testloader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    
    return trainloader, testloader

In [4]:
trainloader, testloader = train_test_loader('Data_Set_2(Black_and_white_images)', train_fraction=0.8, num_workers=0)

In [5]:
class BBRBM:
    
    def __init__(self, visible_nodes, h_len, lr_W=0.01, lr_bias=0.001):
        
        # set lower lr for bias than for the weights
        
        self.N = visible_nodes.shape[0]        
        v_len = visible_nodes.shape[1]
        self.W = torch.randn(v_len, h_len).to(device)
        self.b = torch.randn(1, v_len).to(device)
        self.c = torch.randn(1, h_len).to(device)
        self.V = visible_nodes.to(device)
        self.lr_W = lr_W
        self.lr_bias = lr_bias        
        
    def get_h(self, v):
        
        a = torch.mm(v.view(1,-1), self.W) + self.c
        f = torch.nn.Sigmoid()
        p_h_v = f(a)
        return p_h_v, torch.bernoulli(p_h_v)
    
    def get_v(self, h):
        a = torch.mm(h.view(1,-1), self.W.T) + self.b
        f = torch.nn.Sigmoid()
        p_v_h = f(a)
        return p_v_h, torch.bernoulli(p_v_h)
    
    def params_update(self, p_h_v0, p_h_vk, v0, vk):
        self.W += self.lr_W*(torch.mm(v0.view(-1,1), p_h_v0) - torch.mm(vk.view(-1,1), p_h_vk))/self.N
        self.b += self.lr_bias*(v0 - vk)/self.N
        self.c += self.lr_bias*(p_h_v0 - p_h_vk)/self.N
        
    def one_epoch(self, k):

        for v0 in self.V:
            v_t = v0
            for t in range(k):  
                p_h_vt, h_t = self.get_h(v_t)
                if t==0:
                    p_h_v0 = p_h_vt                    
                p_v_ht, v_t1 = self.get_v(h_t)
                v_t = v_t1

            try:
                V_k = torch.cat((V_k, v_t.view(1,-1)), dim=0)
                H_k = torch.cat((H_k, h_t.view(1,-1)), dim=0)
            except:
                V_k = v_t.view(1,-1)
                H_k = h_t.view(1,-1)

            self.params_update(p_h_v0, p_h_vt, v0, v_t)

        return V_k, H_k
        
    def train(self, k, eps=1e-3):
        ep = 0
        error_old = np.inf
        while True:
            ep += 1
            ## Check if error should be SSE?
            V_k, H_k = self.one_epoch(k)
            error_new = torch.sum((V_k - self.V)**2) 
            print('Epoch: {0}, Error: {1}'.format(ep, error_new))
            
            if abs(error_new - error_old)/error_old <= eps:
                print('Converged!')
                self.V_train = V_k
                self.H_train = H_k                
                break
            error_old = error_new

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [7]:
v = list(trainloader)[0][0]

In [8]:
rbm = BBRBM(v, h_len=500)
rbm.train(k=50, eps=1e-5)

Epoch: 1, Error: 347118752.0
Epoch: 2, Error: 346987328.0
Epoch: 3, Error: 346984768.0
Converged!


In [9]:
v_new = rbm.H_train

In [10]:
rbm2 = BBRBM(v_new, h_len=350)
rbm2.train(k=50)

Epoch: 1, Error: 6323.0
Epoch: 2, Error: 5783.0
Epoch: 3, Error: 5207.0
Epoch: 4, Error: 4634.0
Epoch: 5, Error: 4090.0
Epoch: 6, Error: 3457.0
Epoch: 7, Error: 2852.0
Epoch: 8, Error: 2340.0
Epoch: 9, Error: 1846.0
Epoch: 10, Error: 1438.0
Epoch: 11, Error: 1128.0
Epoch: 12, Error: 889.0
Epoch: 13, Error: 707.0
Epoch: 14, Error: 543.0
Epoch: 15, Error: 416.0
Epoch: 16, Error: 337.0
Epoch: 17, Error: 290.0
Epoch: 18, Error: 273.0
Epoch: 19, Error: 214.0
Epoch: 20, Error: 215.0
Epoch: 21, Error: 199.0
Epoch: 22, Error: 195.0
Epoch: 23, Error: 177.0
Epoch: 24, Error: 170.0
Epoch: 25, Error: 177.0
Epoch: 26, Error: 141.0
Epoch: 27, Error: 142.0
Epoch: 28, Error: 133.0
Epoch: 29, Error: 143.0
Epoch: 30, Error: 148.0
Epoch: 31, Error: 122.0
Epoch: 32, Error: 130.0
Epoch: 33, Error: 124.0
Epoch: 34, Error: 110.0
Epoch: 35, Error: 118.0
Epoch: 36, Error: 102.0
Epoch: 37, Error: 105.0
Epoch: 38, Error: 109.0
Epoch: 39, Error: 99.0
Epoch: 40, Error: 91.0
Epoch: 41, Error: 113.0
Epoch: 42, Error

In [11]:
v_new2 = rbm2.H_train
rbm3 = BBRBM(v_new2, h_len=200)
rbm3.train(k=50)

Epoch: 1, Error: 5118.0
Epoch: 2, Error: 4701.0
Epoch: 3, Error: 4480.0
Epoch: 4, Error: 4177.0
Epoch: 5, Error: 3994.0
Epoch: 6, Error: 3732.0
Epoch: 7, Error: 3425.0
Epoch: 8, Error: 3112.0
Epoch: 9, Error: 2854.0
Epoch: 10, Error: 2610.0
Epoch: 11, Error: 2471.0
Epoch: 12, Error: 2113.0
Epoch: 13, Error: 1942.0
Epoch: 14, Error: 1669.0
Epoch: 15, Error: 1601.0
Epoch: 16, Error: 1513.0
Epoch: 17, Error: 1347.0
Epoch: 18, Error: 1116.0
Epoch: 19, Error: 1120.0
Epoch: 20, Error: 882.0
Epoch: 21, Error: 752.0
Epoch: 22, Error: 604.0
Epoch: 23, Error: 518.0
Epoch: 24, Error: 439.0
Epoch: 25, Error: 384.0
Epoch: 26, Error: 384.0
Converged!


In [12]:
class FinalNet(nn.Module):
    
    def __init__(self, input_size, hidden_sizes, num_classes):
        super(FinalNet, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_sizes[0])
        self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
        self.out = nn.Linear(hidden_sizes[2], num_classes)
    
    def forward(self, x):
        
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        x = self.out(x)
        
        return x
    
    def predict(self, X):
        
        with torch.no_grad():
            y_score = self.forward(X)
            y_pred = torch.argmax(y_score, axis=1)
            
        return y_pred 

In [13]:
classifier = FinalNet(v.shape[1], [500,350,200], 10)

In [14]:
with torch.no_grad():
    
    classifier.fc1.weight.data = nn.Parameter(rbm.W.t())
    classifier.fc1.bias.data = nn.Parameter(rbm.c.squeeze(0))
    
    classifier.fc2.weight = nn.Parameter(rbm2.W.t())
    classifier.fc2.bias = nn.Parameter(rbm2.c.squeeze(0))
    
    classifier.fc3.weight = nn.Parameter(rbm3.W.t())
    classifier.fc3.bias = nn.Parameter(rbm3.c.squeeze(0))

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9)
classifier = classifier.to(device)

In [16]:
trainloader, testloader = train_test_loader('Data_Set_2(Black_and_white_images)', train_fraction=0.8, num_workers=0)

In [None]:
old_loss = np.inf

max_epoch = 500

for epoch in range(max_epoch):

    running_loss = 0.0
    
    for data in trainloader:
        
        X, y = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()
        
        # Forward
        y_hat = classifier(X)
        
        # Calculate Loss (Cross Entropy)
        loss = criterion(y_hat, y)
        
        # Backpropagation
        loss.backward()
        
        # Update Parameters
        optimizer.step()
        
        running_loss += loss.item()
    
    print('Epoch', epoch+1, ': Loss =', running_loss)
    
    if abs(running_loss-old_loss)/running_loss < 1e-3:
        print('Converged')
        break
    
    old_loss = running_loss

print('Finished Training')

In [18]:
with torch.no_grad():
    
    test_loss = 0.0
    y_test = []
    y_test_pred = []

    for data in trainloader:

        X, y = data[0].to(device), data[1].to(device)
        y_hat = classifier(X)      
        test_loss += criterion(y_hat, y)
        
        y_test.extend(list(y.cpu().detach().numpy()))
        y_test_pred.extend(list(torch.argmax(y_hat, axis=1).cpu().detach().numpy()))

print('Train Loss =', test_loss.item())
pd.DataFrame(confusion_matrix(y_test, y_test_pred))

Train Loss = 1898.1773681640625


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,3275,35,26,0,0,979,0,362,0,111
1,93,2147,1670,77,0,84,1,459,17,288
2,0,89,4275,23,1,0,9,1,63,360
3,0,170,493,2450,0,2,0,0,313,1352
4,0,79,4482,7,4,1,20,2,6,166
5,1033,163,18,9,2,2236,0,1219,34,134
6,1,207,3283,75,6,7,31,4,32,1149
7,405,55,2,0,0,424,0,3865,7,2
8,0,5,155,414,1,0,2,0,3982,228
9,1,260,786,302,0,4,12,3,107,3343


In [19]:
from sklearn.metrics import accuracy_score, f1_score, precision_score
acc = accuracy_score(y_test, y_test_pred)
prec = precision_score(y_test, y_test_pred, average='macro')
f1 = f1_score(y_test, y_test_pred, average='macro')

print('Train Accuracy =', acc, 'Train Precision =', prec, 'Train F1 =', f1)

Train Accuracy = 0.5335 Train Precision = 0.5653790151579146 Train F1 = 0.49165426184070027


In [20]:
with torch.no_grad():
    
    test_loss = 0.0
    y_test = []
    y_test_pred = []

    for data in testloader:

        X, y = data[0].to(device), data[1].to(device)
        y_hat = classifier(X)      
        test_loss += criterion(y_hat, y)
        
        y_test.extend(list(y.cpu().detach().numpy()))
        y_test_pred.extend(list(torch.argmax(y_hat, axis=1).cpu().detach().numpy()))

print('Test Loss =', test_loss.item())
pd.DataFrame(confusion_matrix(y_test, y_test_pred))

Test Loss = 466.2761535644531


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,809,9,7,0,0,279,0,81,0,27
1,32,511,400,22,0,17,1,111,2,68
2,0,26,1048,4,1,0,4,0,11,85
3,0,28,116,633,0,0,1,0,85,357
4,0,12,1174,1,1,0,6,1,1,37
5,253,42,5,4,0,535,0,285,8,20
6,1,61,824,14,2,2,7,0,6,288
7,90,18,0,1,0,104,0,1024,3,0
8,0,2,41,81,1,0,1,0,1044,43
9,1,62,208,67,2,1,2,2,17,820


In [21]:
acc = accuracy_score(y_test, y_test_pred)
prec = precision_score(y_test, y_test_pred, average='macro')
f1 = f1_score(y_test, y_test_pred, average='macro')

print('Test Accuracy =', acc, 'Test Precision =', prec, 'Test F1 =', f1)

Test Accuracy = 0.536 Test Precision = 0.5453615227925204 Test F1 = 0.4945673820669253


In [22]:
torch.save(classifier.state_dict(), 'q4_wts.pt')