In [2]:
import os
import numpy as np
import pandas as pd

In [3]:
import torch
import torch.nn as nn
import torch.functional as F

In [4]:
print(torch.__version__) # Get PyTorch and CUDA version
print(f"{torch.cuda.is_available() = }") # Check that CUDA works
print(f"{torch.cuda.device_count() = }") # Check how many CUDA capable devices you have
# Print device human readable names
print(f"{torch.cuda.get_device_name(0) = }")
# Add more lines with +1 like get_device_name(3), get_device_name(4) if you have more devices.

2.0.1
torch.cuda.is_available() = True
torch.cuda.device_count() = 1
torch.cuda.get_device_name(0) = 'NVIDIA T1200 Laptop GPU'


In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(0)
device

device(type='cuda', index=0)

## Example 1

In [6]:
class my_layer(nn.Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_feat, out_feat))
        self.bias = nn.Parameter(torch.randn(out_feat))

    def forward(self, x):
        out = torch.matmul(x, self.weight)
        print(f'Shape after matmul with weights - {out.shape}')
        out = out + self.bias
        print(f'Shape after after adding bias - {out.shape}')
        out = nn.ReLU()(out)
        print(f'Shape after ReLu operation - {out.shape}')
        return out


class custom_clf(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(30, 20)
        self.l2 = nn.Linear(20, 10)
        # add a custom layer object
        self.custom_layer = my_layer(10, 3)
        print(f'Model Instantiated \n{self}')

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.custom_layer(x)
        return x

In [7]:
custom_layer = my_layer(20, 10)
test_tensor = torch.FloatTensor(torch.randn(20))
custom_layer.forward(test_tensor)

Shape after matmul with weights - torch.Size([10])
Shape after after adding bias - torch.Size([10])
Shape after ReLu operation - torch.Size([10])


tensor([0.3100, 0.0000, 0.0000, 8.2378, 0.0000, 0.0000, 2.4701, 3.4011, 0.0000,
        0.0000], grad_fn=<ReluBackward0>)

In [8]:
custom_model = custom_clf()
test_tensor_2 =torch.FloatTensor(torch.randn(2,30))
custom_model.forward(test_tensor_2)

Model Instantiated 
custom_clf(
  (l1): Linear(in_features=30, out_features=20, bias=True)
  (l2): Linear(in_features=20, out_features=10, bias=True)
  (custom_layer): my_layer()
)
Shape after matmul with weights - torch.Size([2, 3])
Shape after after adding bias - torch.Size([2, 3])
Shape after ReLu operation - torch.Size([2, 3])


tensor([[0.4589, 3.2610, 0.0000],
        [0.9169, 4.7479, 0.2257]], grad_fn=<ReluBackward0>)

## Example 2
### Dense Dirichlet Layer

In [9]:
class DenseDirichletLayer(nn.Module):
    def __init__(self, in_feat, out_feat):
        super().__init__()
        self.in_feat = in_feat
        self.out_feat = out_feat
        self.dense = nn.Linear(in_feat, out_feat)

    def forward(self, x):
        print(f'Shape of X - {x.shape}')
        output = self.dense(x)
        print(f'Shape pf output - {output.shape}')
        evidence = torch.exp(output)
        print(f'Shape of Evidence - {evidence.shape}')
        alpha = evidence + 1
        print(f'Shape of alpha - {alpha.shape}')
        prob = alpha / torch.sum(alpha, dim=1, keepdim=True)
        print(f'Shape of prob - {prob.shape}')
        return prob, alpha

In [10]:
class clf_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28*28, 100)
        self.l2 = nn.Linear(100, 50)
        self.dirichlet_layer = DenseDirichletLayer(50, 10)
        print(f'Model Instantiated \n{self}')

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = nn.Tanh()(x)
        prob, alpha = self.dirichlet_layer(x)
        return prob, alpha
    
    def loss(self, alpha, label):
        
        S = torch.sum(alpha, axis=1, keepdim=True)
        m = alpha/S

        A = torch.sum((label - m)**2, axis=1, keepdim=True)
        B = torch.sum(alpha*(S - alpha)/(S*S*(S+1)), axis=1, keepdim=True)
        
        alpha_hat = label + (1-label)*alpha
        C = self.KL(alpha_hat)
        C = torch.mean(C, axis = 1)
        return torch.mean(A + B + C, axis = 1)
    
    def KL(self, alpha):
        beta = torch.FloatTensor(torch.ones((1, alpha.shape[1])))
        S_alpha = torch.sum(alpha, axis = 1, keepdim=True)
        S_beta = torch.sum(beta, axis = 1, keepdim=True)
        lnB = torch.lgamma(S_alpha) - torch.sum(torch.lgamma(alpha), axis = 1, keepdim=True)
        lnB_uni = torch.sum(torch.lgamma(beta), axis=1, keepdim=True) - torch.lgamma(S_beta)
        
        dg0  = torch.digamma(S_alpha)
        dg1 = torch.digamma(alpha)

        kl = torch.sum((alpha - beta)*(dg1-dg0), axis = 0, keepdim=True) + lnB + lnB_uni
        return kl


In [11]:
test_tensor = torch.randn(2, 100)
dirichlet_layer = DenseDirichletLayer(100, 10)
dirichlet_layer(test_tensor)

Shape of X - torch.Size([2, 100])
Shape pf output - torch.Size([2, 10])
Shape of Evidence - torch.Size([2, 10])
Shape of alpha - torch.Size([2, 10])
Shape of prob - torch.Size([2, 10])


(tensor([[0.0726, 0.0989, 0.0666, 0.0874, 0.1117, 0.1626, 0.0912, 0.1133, 0.1283,
          0.0673],
         [0.1116, 0.0804, 0.0844, 0.0791, 0.1345, 0.0922, 0.0783, 0.0673, 0.0921,
          0.1801]], grad_fn=<DivBackward0>),
 tensor([[1.5740, 2.1445, 1.4433, 1.8954, 2.4225, 3.5260, 1.9781, 2.4571, 2.7811,
          1.4597],
         [2.2750, 1.6390, 1.7199, 1.6130, 2.7409, 1.8782, 1.5950, 1.3711, 1.8769,
          3.6706]], grad_fn=<AddBackward0>))

In [12]:
test_tensor_2 = torch.randn(2, 28*28)
dirichlet_model = clf_model()

Model Instantiated 
clf_model(
  (l1): Linear(in_features=784, out_features=100, bias=True)
  (l2): Linear(in_features=100, out_features=50, bias=True)
  (dirichlet_layer): DenseDirichletLayer(
    (dense): Linear(in_features=50, out_features=10, bias=True)
  )
)


In [13]:
dirichlet_model(test_tensor_2)

Shape of X - torch.Size([2, 50])
Shape pf output - torch.Size([2, 10])
Shape of Evidence - torch.Size([2, 10])
Shape of alpha - torch.Size([2, 10])
Shape of prob - torch.Size([2, 10])


(tensor([[0.1037, 0.0954, 0.0940, 0.1032, 0.0921, 0.1076, 0.0997, 0.0977, 0.1091,
          0.0976],
         [0.0904, 0.1050, 0.1073, 0.0961, 0.0950, 0.0919, 0.0972, 0.0966, 0.1116,
          0.1089]], grad_fn=<DivBackward0>),
 tensor([[1.9460, 1.7905, 1.7635, 1.9376, 1.7283, 2.0188, 1.8714, 1.8343, 2.0468,
          1.8310],
         [1.7761, 2.0623, 2.1079, 1.8873, 1.8657, 1.8044, 1.9098, 1.8971, 2.1925,
          2.1385]], grad_fn=<AddBackward0>))