<a href="https://colab.research.google.com/github/5hadytru/ELM/blob/main/ELM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

In [100]:
BATCH_SIZE = 32

## transformations
transform = transforms.Compose(
    [transforms.ToTensor()])

## download training + test datasets
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                        download=True, transform=transform)

## create an iterator for the training data
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

## do the same for the test set
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

In [36]:
import matplotlib.pyplot as plt
import numpy as np

## functions to show an image
def imshow(img):
    #img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

## get some random training images
trainiter = iter(trainloader)
images, labels = trainiter.next()

## show images
imshow(torchvision.utils.make_grid(images))

In [7]:
for images, labels in trainloader:
    print("Image batch dimensions:", images.shape)
    print("Image label dimensions:", labels.shape)
    print(type(images), type(labels))
    print(labels)
    break

Image batch dimensions: torch.Size([32, 1, 28, 28])
Image label dimensions: torch.Size([32])
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([1, 3, 0, 0, 4, 0, 5, 2, 8, 6, 2, 6, 2, 5, 8, 7, 2, 5, 7, 7, 1, 3, 5, 5,
        5, 0, 2, 1, 4, 5, 9, 3])


In [8]:
  labels.shape[0]

32

In [172]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

class ELM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ELM, self).__init__()

        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        self.hidden = nn.Linear(input_dim, hidden_dim)
        self.beta = nn.Linear(hidden_dim, output_dim, bias=False)

        # initialize hidden layer with Gaussian random weights + biases of mean 0 and var 1
        self.hidden.weight.data = torch.randn_like(self.hidden.weight.data)
        self.hidden.bias.data = torch.randn_like(self.hidden.bias.data)

    def randomize_H(self):
        # initialize hidden layer with Gaussian random weights + biases of mean 0 and var 1
        self.hidden.weight.data = torch.randn_like(self.hidden.weight.data)
        self.hidden.bias.data = torch.randn_like(self.hidden.bias.data)

    def fit(self, trainloader, one_hot=False, flatten=False):
        H_outputs = torch.empty(len(trainloader.dataset), self.hidden_dim)
        labels = torch.empty(len(trainloader.dataset), self.output_dim) # will be Nxoutput_dim

        # compute and append hidden layer output for each sample
        dataset_count = 0 # keep track of index of current training sample
        for data_batch, label_batch in trainloader:
            
            batch_count = 0 # keep track of index in this batch
            for x in data_batch:
                # compute and append hidden layer output for this sample
                if flatten:
                    H_outputs[dataset_count] = F.sigmoid(self.hidden(torch.flatten(x)))
                else:
                    H_outputs[dataset_count] = F.sigmoid(self.hidden(x))

                # append one-hot encoded label to labels tensor
                if one_hot:
                    labels[dataset_count] = F.one_hot(label_batch[batch_count], num_classes=self.output_dim)
                else:
                    labels[dataset_count] = label_batch[batch_count]

                dataset_count += 1 
                batch_count += 1
        
        # compute pseudoinverse of H
        H_outputs_pinv = torch.linalg.pinv(H_outputs)

        # apply beta layer = pseudoinverse(H) * labels
        print(f"Beta shape: {self.beta.weight.data.shape}")
        self.beta.weight.data = torch.matmul(H_outputs_pinv, labels)

    def forward(self, x, flatten=False):
        if flatten:
            H_out = F.sigmoid(self.hidden(torch.flatten(x)))
        else:
            H_out = F.sigmoid(self.hidden(x))
        pred = torch.matmul(H_out, self.beta.weight.data)
        return torch.argmax(pred).item()

Using cpu device


In [177]:
model = ELM(28*28, 3000, 10)
model.fit(trainloader, one_hot=True, flatten=True)



Beta shape: torch.Size([10, 3000])


In [178]:
# test the network
correct = 0
incorrect = 0
for image_batch, label_batch in testloader:
    count = 0
    for image in image_batch:
        pred = model.forward(image, flatten=True)
        if label_batch[count] == pred:
            correct += 1
        else:
            incorrect += 1
        count += 1

print(correct / (correct + incorrect))



0.9602
