# Test RedisAI with Pytorch

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torchvision.models as models

c:\users\diego\cs\thesis\venv\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
c:\users\diego\cs\thesis\venv\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
  stacklevel=1)


## Create the network

Take the network from the pytorch MNIST examples 
(https://github.com/pytorch/examples/blob/master/mnist/main.py)

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [3]:
torch.cuda.is_available()

True

## Create the dataset and the data loader

In [4]:
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

data = datasets.MNIST('./data', train=True, download=False, transform=transform)

In [5]:
data.targets.dtype

torch.int64

In [6]:
train_loader = torch.utils.data.DataLoader(data, batch_size=128)

In [7]:
len(train_loader.dataset)

60000

In [8]:
it = iter(train_loader)

### Load a single tensor to forward

In [6]:
img, target = it.next()

## Create a function to store the gradients during training at the end of each epoch and see how much time it takes

In [9]:
tensor_d = {}

# We get similar performance with both methods,
# and with the second one we dont need to use twice the amount of GPU mem

# Should we do this with a backup model or should be save the state in a dict for example?? we could add cuda tensors there
def update_tensor(m: nn.Module, backup: nn.Module):
    """Saves all of the model layers and adds the gradients
    
    For this we need the two networks to reside in the GPU
    which will use extra memory, instead of that we could use a dictionary
    """
    for (n1, l1), (n2, l2) in zip(m.named_children(), backup.named_children()):
        if hasattr(l1, 'weight'):
            if l2.weight.grad is None:
                l2.weight.grad = l1.weight.grad
                l2.bias.grad = l1.bias.grad
            else:
                l2.weight.grad += l1.weight.grad
                l2.bias.grad += l1.bias.grad


# def update_tensor_dict(m:nn.Module, d:dict):
#     def needs_saving(t):
#         t = str(t)
#         if 'conv' in t or 'linear' in t:
#             return True
#         return False
    
#     with torch.no_grad():
#         for n, l in m.named_modules():
#             if needs_saving(type(l)):
#                 if n in d:
#                     d[f'{n}-weight-grad'] += l.weight.grad
#                     d[f'{n}-bias-grad'] += l.bias.grad
#                 else:
#                     d[f'{n}-weight-grad'] = l.weight.grad
#                     d[f'{n}-bias-grad'] = l.bias.grad
                    
def update_tensor_dict(m: nn.Module, d: dict):
    """Update the tensor dict so we can save it after the epoch is finished"""
    with torch.no_grad():
        for name, layer in m.named_modules():
            if _is_optimizable(layer):
                if name in d:
                    d[f'{name}-weight-grad'] += layer.weight.grad
                    if layer.bias is not None:
                        d[f'{name}-bias-grad'] += layer.bias.grad
                else:
                    d[f'{name}-weight-grad'] = layer.weight.grad
                    if layer.bias is not None:
                        d[f'{name}-bias-grad'] = layer.bias.grad
                    
def _is_optimizable(layer: nn.Module) -> bool:
    """Should save layer returns just whether the layer is optimizable or not
    and thus if it should be sent to the parameter server"""
    t = str(type(layer))
    if 'conv' in t or 'linear' in t:
        return True
    return False


In [32]:
model = models.resnet18()

In [42]:
# Create a save layers model that will simply check all the layers if they are 
# This should be inside the update_tensor_d
for n, l in model.named_modules():
    if 'conv' in str(type(l)) or 'linear' in str(type(l)):
        print(n, type(l), l.bias is None, hasattr(l, 'weight'))

conv1 <class 'torch.nn.modules.conv.Conv2d'> False True
conv2 <class 'torch.nn.modules.conv.Conv2d'> False True
fc1 <class 'torch.nn.modules.linear.Linear'> False True
fc2 <class 'torch.nn.modules.linear.Linear'> False True


## Create the network and do a forward and backward pass to get the gradients

In [91]:
import copy
model = Net()
backup = Net()
# backup = copy.deepcopy(model)

optimizer = optim.Adam(model.parameters(), lr=0.01)
model.train()

# Send the network to the GPU
# model= model
# backup.cuda()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

Parameter containing:
tensor([[[[ 0.0235,  0.2356,  0.2535],
          [-0.2728,  0.2479,  0.0950],
          [ 0.0617,  0.2122,  0.0183]]],


        [[[-0.2865, -0.1107,  0.0394],
          [ 0.1674,  0.0032,  0.0407],
          [ 0.2226, -0.1953,  0.2298]]],


        [[[-0.0193,  0.0140, -0.2342],
          [-0.2632, -0.0091, -0.1257],
          [-0.0227,  0.1940,  0.0351]]],


        [[[ 0.2221,  0.1044, -0.1893],
          [-0.0967,  0.0654, -0.1620],
          [-0.2458, -0.1814, -0.0647]]],


        [[[ 0.1544,  0.0308,  0.3172],
          [ 0.1629, -0.2015, -0.1664],
          [ 0.1575, -0.1471, -0.0732]]],


        [[[ 0.2990, -0.0592, -0.3025],
          [ 0.1203,  0.0926,  0.2259],
          [ 0.2874,  0.2990,  0.1681]]],


        [[[ 0.0245, -0.0052,  0.0436],
          [-0.0732, -0.3006,  0.2841],
          [ 0.2441,  0.2335, -0.2484]]],


        [[[ 0.2053, -0.0040,  0.1647],
          [-0.3236,  0.3273, -0.0845],
          [-0.2851, -0.0678,  0.2508]]],


        [[

In [97]:
save_model_weights(model)

Saving model to the database
Setting weights for layer conv1
Setting bias for layer conv1
Setting weights for layer conv2
Setting bias for layer conv2
Setting weights for layer fc1
Setting bias for layer fc1
Setting weights for layer fc2
Setting bias for layer fc2
Saved model to the database


In [59]:
# Load model weights from redis
load_model_weights(backup)

Loading model from database
Loading weights for layer conv1
Loading bias for layer conv1
Loading weights for layer conv2
Loading bias for layer conv2
Loading weights for layer fc1
Loading bias for layer fc1
Loading weights for layer fc2
Loading bias for layer fc2
Model loaded from database


In [96]:
%%time
count = 0


tensor_d.clear()

model = model.cuda()

for inputs, targets in train_loader:
    
    count += len(inputs)
    
    optimizer.zero_grad()
    
    if count % 2048 == 0:
        print(f'Training {count}/{len(train_loader.dataset)} \t {loss.item()}')
    
    inputs = inputs.cuda()
    targets = targets.cuda()

    # Zero the optimizer before the forward pass
    optimizer.zero_grad()

    # forward pass
    out = model(inputs)

    loss = F.nll_loss(out, targets)
    loss.backward()
    
    #Here update the models
    update_tensor_dict(model, tensor_d)
    
    optimizer.step()

Training 2048/60000 	 1.2616820335388184
Training 4096/60000 	 0.68885338306427
Training 6144/60000 	 0.6343638896942139
Training 8192/60000 	 0.7935402393341064
Training 10240/60000 	 0.575653612613678
Training 12288/60000 	 0.5969637632369995
Training 14336/60000 	 0.766459584236145
Training 16384/60000 	 0.4128168821334839
Training 18432/60000 	 0.3919685482978821
Training 20480/60000 	 0.5609372854232788
Training 22528/60000 	 0.36649125814437866
Training 24576/60000 	 0.33986788988113403
Training 26624/60000 	 0.5276138186454773
Training 28672/60000 	 0.5822106599807739
Training 30720/60000 	 0.6331431269645691
Training 32768/60000 	 0.37781375646591187
Training 34816/60000 	 0.6807652711868286
Training 36864/60000 	 0.40601012110710144
Training 38912/60000 	 0.5818591117858887
Training 40960/60000 	 0.28688257932662964
Training 43008/60000 	 0.5239367485046387
Training 45056/60000 	 0.5885693430900574
Training 47104/60000 	 0.425136536359787
Training 49152/60000 	 0.6892682313919

In [68]:
tensor_d

{}

In [60]:
model.conv1.weight

Parameter containing:
tensor([[[[ 4.6149e-02,  2.5382e-02, -1.3919e-01],
          [ 9.6464e-02,  2.1503e-01,  1.5693e-04],
          [-1.7648e-01, -2.4237e-02,  6.4049e-02]]],


        [[[-3.3523e-03, -3.0967e-02,  1.1224e-02],
          [-1.0897e-01, -3.8601e-02,  9.7182e-03],
          [-2.4516e-02, -2.1932e-02, -1.6157e-01]]],


        [[[-2.8873e-02,  1.9957e-01,  3.3108e-01],
          [-1.8774e-01,  1.5224e-01,  5.6744e-02],
          [-5.9952e-01, -1.5616e-01, -4.9861e-01]]],


        [[[ 1.1531e-01,  2.2534e-01,  3.4327e-01],
          [-3.0778e-01, -6.4887e-01, -4.0805e-01],
          [-2.4696e-02,  2.3278e-01,  1.4784e-02]]],


        [[[ 2.8062e-01,  1.2544e-01, -5.1792e-01],
          [-1.1023e-01,  1.4122e-02,  2.1188e-01],
          [-3.9787e-01,  2.2591e-01,  1.5227e-01]]],


        [[[ 1.5141e-01, -2.1847e-02, -3.5513e-01],
          [ 1.4703e-01, -1.5702e-01, -2.6748e-01],
          [-2.7645e-01,  6.2271e-02, -1.9959e-01]]],


        [[[-2.4078e-02, -1.1554e-01,

In [61]:
before backup.conv1.weight

Parameter containing:
tensor([[[[-0.3718, -0.1068, -0.0426],
          [ 0.2245,  0.1057, -0.2058],
          [ 0.0035, -0.0243, -0.4423]]],


        [[[-0.1578,  0.2234, -0.0661],
          [-0.3676,  0.0397,  0.1246],
          [ 0.2546, -0.0928, -0.2072]]],


        [[[ 0.0256,  0.1274,  0.1096],
          [-0.2469, -0.3871, -0.0428],
          [ 0.0592, -0.2570,  0.0906]]],


        [[[-0.0878,  0.3395,  0.1095],
          [-0.3896, -0.1001, -0.0935],
          [ 0.2531,  0.2041,  0.1415]]],


        [[[ 0.1875, -0.1457, -0.0546],
          [ 0.2055, -0.0356,  0.2497],
          [ 0.1004,  0.0297, -0.2204]]],


        [[[-0.1961, -0.3088, -0.2178],
          [-0.1301, -0.1537, -0.1746],
          [-0.0933,  0.3111,  0.2850]]],


        [[[-0.1250, -0.1383, -0.0649],
          [-0.0854, -0.0944,  0.1091],
          [-0.0667,  0.0095, -0.1420]]],


        [[[ 0.2797,  0.0304,  0.1671],
          [-0.0938, -0.1174,  0.2365],
          [-0.1415, -0.2085, -0.3060]]],


        [[

## Start with the REDIS AI part

In [15]:
import redisai as rai

In [16]:
RAI_KUBE = '192.168.99.101'
RAI_PORT_KUBE = 31618
RAI = '192.168.99.102'
PORT = 6379

con = rai.Client(host=RAI, port=PORT)

In [17]:
con.tensorset('grad-conv1', c1_grad.numpy(), dtype='float32')
con.tensorset('bias-conv1', c1_bias.numpy(), dtype='float32')

'OK'

In [18]:
con.set('example', 'hola')
con.set('exaaaaa', 'hola2')

True

In [12]:
%%time
a = con.tensorget('example:fc1-weight', as_numpy=False)


Wall time: 2.23 s


### Set all the model gradients to the database

In [19]:
%%time
psId = 'example'

for n, l in model.named_children():
    if hasattr(l, 'bias'):
        key_w = f'{psId}:{n}-weight'
        key_b = f'{psId}:{n}-bias'

        print('Setting', key_w)
        con.tensorset(key_w, l.weight.cpu().detach().numpy(), dtype='float32')

        print('Setting', key_b)
        con.tensorset(key_b, l.bias.cpu().detach().numpy(), dtype='float32')
    
    

Setting example:conv1-weight
Setting example:conv1-bias
Setting example:conv2-weight
Setting example:conv2-bias
Setting example:fc1-weight
Setting example:fc1-bias
Setting example:fc2-weight
Setting example:fc2-bias
Wall time: 41.9 ms


### Set the model gradients from the dict

In [17]:
for k, v in tensor_d.items():
    print('Setting' , k)
    con.tensorset(f'{k}/1', v.cpu().numpy())

Setting conv1-weight-grad
Setting conv1-bias-grad
Setting conv2-weight-grad
Setting conv2-bias-grad
Setting fc1-weight-grad
Setting fc1-bias-grad
Setting fc2-weight-grad
Setting fc2-bias-grad


### Try to get all of the layers from redis

In [59]:
for n, l in m2.named_children():
    if hasattr(l, 'bias'):
        key_w = f'{psId}:{n}-weight'
        key_b = f'{psId}:{n}-bias'
            
        print('Getting', key_w)
        t = con.tensorget(key_w)
        l.weight = torch.nn.Parameter(torch.from_numpy(t))

        print('Getting', key_b)
        t = con.tensorget(key_b)
        l.bias =torch.nn.Parameter(torch.from_numpy(t))

Getting example:conv1-weight
Getting example:conv1-bias
Getting example:conv2-weight
Getting example:conv2-bias
Getting example:fc1-weight
Getting example:fc1-bias
Getting example:fc2-weight
Getting example:fc2-bias


### Try to set the model to REDIS

We can simply save the state dict and retrieve it super quickly from the following functions as a python object

In [92]:
def save_model_weights(m: nn.Module):
    r"""After the init task we should save the model gradients to the database

    Instead of looking if a layer has a bias term (some of the batch norm can have it,
    look if the layer is of type conv or not"""
    print('Saving model to the database')
    with torch.no_grad():
        for name, layer in m.named_modules():
            if _is_optimizable(layer):

                # Save the weights
                print(f'Setting weights for layer {name}')
                weight_key = f'{name}.weight'
                con.tensorset(weight_key, layer.weight.cpu().detach().numpy(), dtype='float32')

                # Save the bias if not None
                if layer.bias is not None:
                    print(f'Setting bias for layer {name}')
                    bias_key = f'{name}.bias'
                    con.tensorset(bias_key, layer.bias.cpu().detach().numpy(), dtype='float32')

    print('Saved model to the database')
    
def load_model_weights(m: nn.Module):
    """Load the model weights saved in the database to start the new epoch"""
    print('Loading model from database')
    with torch.no_grad():
        for name, layer in m.named_modules():
            # only load and save layers that are optimizable (conv or fc)
            if _is_optimizable(layer):

                # Load the weight
                print(f'Loading weights for layer {name}')
                weight_key = f'{name}-weight'
                w = con.tensorget(weight_key)
                layer.weight = torch.nn.Parameter(torch.from_numpy(w))

                # If the layer has an active bias retrieve it
                # Some of the layers in resnet do not have bias
                # or it is None. It is not needed with BN, so skip it
                if layer.bias is not None:
                    print(f'Loading bias for layer {name}')
                    bias_key = f'{name}-bias'
                    w = con.tensorget(bias_key)
                    layer.bias = torch.nn.Parameter(torch.from_numpy(w))

    print('Model loaded from database')
    
def load_state_dict(m: nn.Module) -> dict:
    d = dict()
    for name, layer in m.named_modules():
        # only load and save layers that are optimizable (conv or fc)
        if _is_optimizable(layer):

            # Load the weight
            print(f'Loading weights for layer {name}')
            weight_key = f'{name}.weight'
            w = con.tensorget(weight_key)
            # set the weight
            d[weight_key] = torch.from_numpy(w)

            # If the layer has an active bias retrieve it
            # Some of the layers in resnet do not have bias
            # or it is None. It is not needed with BN, so skip it
            if layer.bias is not None:
                print(f'Loading bias for layer {name}')
                bias_key = f'{name}.bias'
                w = con.tensorget(bias_key)
                # set the bias
                d[bias_key] = torch.from_numpy(w)
    return d

In [93]:
d = load_state_dict(model)

Loading weights for layer conv1
Loading bias for layer conv1
Loading weights for layer conv2
Loading bias for layer conv2
Loading weights for layer fc1
Loading bias for layer fc1
Loading weights for layer fc2
Loading bias for layer fc2


In [95]:
model.load_state_dict(d)

<All keys matched successfully>

In [9]:
model.state_dict()

OrderedDict([('conv1.weight',
              tensor([[[[ 0.1247, -0.2343,  0.2413],
                        [ 0.2814, -0.2949, -0.1152],
                        [ 0.1794,  0.3214,  0.1535]]],
              
              
                      [[[-0.2677, -0.2998,  0.2019],
                        [-0.0371, -0.2684,  0.3079],
                        [-0.1485, -0.1790,  0.1690]]],
              
              
                      [[[ 0.0169, -0.2387,  0.1334],
                        [ 0.0769,  0.2841,  0.1754],
                        [ 0.1311,  0.0290, -0.0110]]],
              
              
                      [[[-0.0936, -0.2159, -0.1453],
                        [-0.1982,  0.3253,  0.0502],
                        [ 0.2596, -0.1228, -0.2570]]],
              
              
                      [[[-0.0238,  0.1362,  0.2403],
                        [ 0.1665, -0.2756,  0.0901],
                        [ 0.0269, -0.2431,  0.0739]]],
              
              
               

In [10]:
import pickle
p = pickle.dumps(model.state_dict())

In [12]:
con.set('model', p)

True

In [24]:
%%time

d = con.get('model')

Wall time: 29.9 ms


In [14]:
s = pickle.loads(d)

In [16]:
model.load_state_dict(s)

<All keys matched successfully>

### Save the layer names in redis

m = Net()

In [10]:
# How to get the weighted layers and save the model
[n for n, l in m.named_children() if hasattr(l, "bias")]

['conv1', 'conv2', 'fc1', 'fc2']

In [12]:
l = " ".join([n for n, l in m.named_children() if hasattr(l, "bias")])
con.set("layers", l)

True

In [13]:
for l in [n for n, l in m.named_children() if hasattr(l, "bias")]:
    con.rpush('layers', l)

In [17]:
import json

layers = ['conv1', 'conv2', 'fc1', 'fc2']
l = json.dumps(layers)

l

'["conv1", "conv2", "fc1", "fc2"]'

### Save the layers by making an http request to the server

In [16]:
import requests

In [25]:
print(l)

requests.post("http://localhost:58682/layers", data=l)

["conv1", "conv2", "fc1", "fc2"]


<Response [200]>