# Test RedisAI with Pytorch

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

## Create the network

Take the network from the pytorch MNIST examples 
(https://github.com/pytorch/examples/blob/master/mnist/main.py)

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [3]:
torch.cuda.is_available()

True

## Create the dataset and the data loader

In [27]:
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

data = datasets.MNIST('./data', train=True, download=False, transform=transform)

In [28]:
train_loader = torch.utils.data.DataLoader(data, batch_size=256)

In [5]:
it = iter(train_loader)

### Load a single tensor to forward

In [6]:
img, target = it.next()

## Create a function to store the gradients during training at the end of each epoch and see how much time it takes

In [30]:
tensor_d = {}

# We get similar performance with both methods,
# and with the second one we dont need to use twice the amount of GPU mem

# Should we do this with a backup model or should be save the state in a dict for example?? we could add cuda tensors there
def update_tensor(m: nn.Module, backup: nn.Module):
    """Saves all of the model layers and adds the gradients
    
    For this we need the two networks to reside in the GPU
    which will use extra memory, instead of that we could use a dictionary
    """
    for (n1, l1), (n2, l2) in zip(m.named_children(), backup.named_children()):
        if hasattr(l1, 'weight'):
            if l2.weight.grad is None:
                l2.weight.grad = l1.weight.grad
                l2.bias.grad = l1.bias.grad
            else:
                l2.weight.grad += l1.weight.grad
                l2.bias.grad += l1.bias.grad
                
def update_tensor_dict(m:nn.Module, d:dict):
    for n, l in m.named_children():
        if hasattr(l, 'weight'):
            if n in d:
                d[f'{n}-weight-grad'] += l.weight.grad
                d[f'{n}-bias-grad'] += l.bias.grad
            else:
                d[f'{n}-weight-grad'] = l.weight.grad
                d[f'{n}-bias-grad'] = l.bias.grad


<bound method Module.type of Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)>

In [None]:
for n, l in model.named_children():
    if hasattr(l, 'weight'):
        print(l.weight.grad)

## Create the network and do a forward and backward pass to get the gradients

In [25]:
import copy
model = Net()
# backup = copy.deepcopy(model)

optimizer = optim.Adam(model.parameters(), lr=0.01)
model.train()

# Send the network to the GPU
model= model.cuda()
# backup.cuda()

In [31]:
%%time
count = 0

for inputs, targets in train_loader:
    
    count += len(inputs)
    
    optimizer.zero_grad()
    
    if count % 2048 == 0:
        print(f'Training {count}/{236*len(train_loader)}')
    
    inputs = inputs.cuda()
    targets = targets.cuda()

    # Zero the optimizer before the forward pass
    optimizer.zero_grad()

    # forward pass
    out = model(inputs)

    loss = F.nll_loss(out, targets)
    loss.backward()
    
    #Here update the models
    update_tensor_dict(model, tensor_d)
    
    optimizer.step()

Training 2048/55460
Training 4096/55460
Training 6144/55460
Training 8192/55460
Training 10240/55460
Training 12288/55460
Training 14336/55460
Training 16384/55460
Training 18432/55460
Training 20480/55460
Training 22528/55460
Training 24576/55460
Training 26624/55460
Training 28672/55460
Training 30720/55460
Training 32768/55460
Training 34816/55460
Training 36864/55460
Training 38912/55460
Training 40960/55460
Training 43008/55460
Training 45056/55460
Training 47104/55460
Training 49152/55460
Training 51200/55460
Training 53248/55460
Training 55296/55460
Training 57344/55460
Training 59392/55460
Wall time: 8.36 s


In [75]:
backup.conv1.weight.grad

tensor([[[[ 1.4642e-01,  2.7304e-01,  4.5395e-01],
          [ 2.6783e-01,  3.6382e-01,  4.4114e-01],
          [ 3.1757e-01,  3.6190e-01,  2.8485e-01]]],


        [[[ 1.4341e-02,  4.2412e-02,  5.5452e-02],
          [-1.2178e-01, -1.5585e-01, -1.3747e-01],
          [-2.2299e-01, -2.6808e-01, -2.4312e-01]]],


        [[[-4.6722e-02, -1.4312e-01, -1.6756e-01],
          [ 8.2408e-03, -1.0563e-01, -1.9977e-01],
          [ 3.4165e-02, -5.1862e-02, -1.0601e-01]]],


        [[[-8.0611e-03,  2.1725e-03, -4.4433e-03],
          [ 6.0551e-03,  5.6476e-04, -7.2787e-03],
          [ 2.3656e-02,  3.0154e-02,  2.3867e-03]]],


        [[[ 6.4638e-02,  5.0914e-02,  2.2711e-02],
          [ 5.5055e-02,  8.5083e-02,  1.7322e-01],
          [-3.8288e-02,  1.2097e-01,  1.4546e-01]]],


        [[[ 6.8327e-02,  1.0597e-01,  8.8448e-02],
          [-2.7004e-02, -3.0769e-02, -2.9263e-02],
          [-7.5956e-02, -5.8920e-02, -3.8645e-02]]],


        [[[ 4.3818e-02,  9.7501e-02,  1.4623e-01],
       

In [9]:
c1_grad = model.conv1.weight.grad
c1_bias = model.conv1.bias.grad

In [13]:
model.conv2.bias

Parameter containing:
tensor([ 0.0227, -0.0067, -0.0178, -0.0109, -0.0477, -0.0449,  0.0397, -0.0082,
         0.0264,  0.0336, -0.0039, -0.0088, -0.0036, -0.0117, -0.0381, -0.0327,
         0.0582, -0.0235,  0.0553,  0.0312, -0.0406,  0.0068, -0.0255, -0.0035,
        -0.0088, -0.0571, -0.0233,  0.0372,  0.0306, -0.0292, -0.0360, -0.0016,
        -0.0143, -0.0280,  0.0211, -0.0314,  0.0349, -0.0057,  0.0332, -0.0252,
        -0.0051, -0.0295, -0.0579,  0.0104, -0.0027, -0.0251, -0.0517,  0.0363,
        -0.0509, -0.0433, -0.0313,  0.0249, -0.0252, -0.0375, -0.0479, -0.0196,
         0.0496,  0.0129,  0.0549,  0.0393, -0.0151,  0.0468, -0.0266,  0.0164],
       requires_grad=True)

## Start with the REDIS AI part

In [3]:
import redisai as rai

In [4]:
con = rai.Client(host='192.168.99.102', port=6379)

In [17]:
con.tensorset('grad-conv1', c1_grad.numpy(), dtype='float32')
con.tensorset('bias-conv1', c1_bias.numpy(), dtype='float32')

'OK'

In [18]:
con.set('example', 'hola')
con.set('exaaaaa', 'hola2')

True

### Set all the model gradients to the database

In [44]:
psId = 'example'

for n, l in model.named_children():
    if hasattr(l, 'bias'):
        key_w = f'{psId}:{n}-weight'
        key_b = f'{psId}:{n}-bias'

        print('Setting', key_w)
        con.tensorset(key_w, l.weight.cpu().detach().numpy(), dtype='float32')

        print('Setting', key_b)
        con.tensorset(key_b, l.bias.cpu().detach().numpy(), dtype='float32')
    
    

Setting example:conv1-weight
Setting example:conv1-bias
Setting example:conv2-weight
Setting example:conv2-bias
Setting example:fc1-weight
Setting example:fc1-bias
Setting example:fc2-weight
Setting example:fc2-bias


### Try to set the model to REDIS

We can simply save the state dict and retrieve it super quickly from the following functions as a python object

In [9]:
model.state_dict()

OrderedDict([('conv1.weight',
              tensor([[[[ 0.1247, -0.2343,  0.2413],
                        [ 0.2814, -0.2949, -0.1152],
                        [ 0.1794,  0.3214,  0.1535]]],
              
              
                      [[[-0.2677, -0.2998,  0.2019],
                        [-0.0371, -0.2684,  0.3079],
                        [-0.1485, -0.1790,  0.1690]]],
              
              
                      [[[ 0.0169, -0.2387,  0.1334],
                        [ 0.0769,  0.2841,  0.1754],
                        [ 0.1311,  0.0290, -0.0110]]],
              
              
                      [[[-0.0936, -0.2159, -0.1453],
                        [-0.1982,  0.3253,  0.0502],
                        [ 0.2596, -0.1228, -0.2570]]],
              
              
                      [[[-0.0238,  0.1362,  0.2403],
                        [ 0.1665, -0.2756,  0.0901],
                        [ 0.0269, -0.2431,  0.0739]]],
              
              
               

In [10]:
import pickle
p = pickle.dumps(model.state_dict())

In [12]:
con.set('model', p)

True

In [24]:
%%time

d = con.get('model')

Wall time: 29.9 ms


In [14]:
s = pickle.loads(d)

In [16]:
model.load_state_dict(s)

<All keys matched successfully>

### Save the layer names in redis

m = Net()


In [10]:
# How to get the weighted layers and save the model
[n for n, l in m.named_children() if hasattr(l, "bias")]

['conv1', 'conv2', 'fc1', 'fc2']

In [5]:
import json

layers = ['conv1', 'conv2', 'fc1', 'fc2']
l = json.dumps(layers)

con.set('layers', l)

True