# Test RedisAI with Pytorch

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import torchvision.models as models

# import the modules used in the program
import train_utils

## Create the network

Take the network from the pytorch MNIST examples 
(https://github.com/pytorch/examples/blob/master/mnist/main.py)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [None]:
torch.cuda.is_available()

## Create the dataset and the data loader

In [None]:
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

data = datasets.MNIST('./data', train=True, download=False, transform=transform)

In [None]:
train_loader = torch.utils.data.DataLoader(data, batch_size=128)

## Create a function to store the gradients during training at the end of each epoch and see how much time it takes

In [None]:
tensor_d = {}

# We get similar performance with both methods,
# and with the second one we dont need to use twice the amount of GPU mem

# Should we do this with a backup model or should be save the state in a dict for example?? we could add cuda tensors there
def update_tensor(m: nn.Module, backup: nn.Module):
    """Saves all of the model layers and adds the gradients
    
    For this we need the two networks to reside in the GPU
    which will use extra memory, instead of that we could use a dictionary
    """
    for (n1, l1), (n2, l2) in zip(m.named_children(), backup.named_children()):
        if hasattr(l1, 'weight'):
            if l2.weight.grad is None:
                l2.weight.grad = l1.weight.grad
                l2.bias.grad = l1.bias.grad
            else:
                l2.weight.grad += l1.weight.grad
                l2.bias.grad += l1.bias.grad


# def update_tensor_dict(m:nn.Module, d:dict):
#     def needs_saving(t):
#         t = str(t)
#         if 'conv' in t or 'linear' in t:
#             return True
#         return False
    
#     with torch.no_grad():
#         for n, l in m.named_modules():
#             if needs_saving(type(l)):
#                 if n in d:
#                     d[f'{n}-weight-grad'] += l.weight.grad
#                     d[f'{n}-bias-grad'] += l.bias.grad
#                 else:
#                     d[f'{n}-weight-grad'] = l.weight.grad
#                     d[f'{n}-bias-grad'] = l.bias.grad
                    
def update_tensor_dict(m: nn.Module, d: dict):
    """Update the tensor dict so we can save it after the epoch is finished"""
    with torch.no_grad():
        for name, layer in m.named_modules():
            if _is_optimizable(layer):
                if name in d:
                    d[f'{name}-weight-grad'] += layer.weight.grad
                    if layer.bias is not None:
                        d[f'{name}-bias-grad'] += layer.bias.grad
                else:
                    d[f'{name}-weight-grad'] = layer.weight.grad
                    if layer.bias is not None:
                        d[f'{name}-bias-grad'] = layer.bias.grad
                    
def _is_optimizable(layer: nn.Module) -> bool:
    """Should save layer returns just whether the layer is optimizable or not
    and thus if it should be sent to the parameter server"""
    t = str(type(layer))
    if 'conv' in t or 'linear' in t:
        return True
    return False


In [None]:
model = models.resnet18()

In [None]:
# Create a save layers model that will simply check all the layers if they are 
# This should be inside the update_tensor_d
for n, l in model.named_modules():
    if 'conv' in str(type(l)) or 'linear' in str(type(l)):
        print(n, type(l), l.bias is None, hasattr(l, 'weight'))

## Create the network and do a forward and backward pass to get the gradients

In [None]:
import copy
model = Net()
# backup = Net()
# backup = copy.deepcopy(model)

optimizer = optim.Adam(model.parameters(), lr=0.01)
model.train()

# Send the network to the GPU
model= model.cuda()
# backup.cuda()

In [None]:
save_model_weights(model)

In [None]:
# Load model weights from redis
load_model_weights(backup)

In [None]:
%%time
count = 0

model.load_state_dict(load_state_dict(model))
tensor_d.clear()

model = model.cuda()

for inputs, targets in train_loader:
    
    count += len(inputs)
    
    optimizer.zero_grad()
    
    if count % 2048 == 0:
        print(f'Training {count}/{len(train_loader.dataset)} \t {loss.item()}')
    
    inputs = inputs.cuda()
    targets = targets.cuda()

    # Zero the optimizer before the forward pass
    optimizer.zero_grad()

    # forward pass
    out = model(inputs)

    loss = F.nll_loss(out, targets)
    loss.backward()
    
    #Here update the models
    update_tensor_dict(model, tensor_d)
    
    optimizer.step()

In [None]:
tensor_d

In [None]:
model.conv1.weight

In [None]:
before backup.conv1.weight

## Start with the REDIS AI part

In [None]:
import redisai as rai

In [None]:
RAI_KUBE = '192.168.99.101'
RAI_PORT_KUBE = 31618
RAI = '192.168.99.102'
PORT = 6379

con = rai.Client(host=RAI_KUBE, port=RAI_PORT_KUBE)

In [None]:
con.tensorset('grad-conv1', c1_grad.numpy(), dtype='float32')
con.tensorset('bias-conv1', c1_bias.numpy(), dtype='float32')

In [None]:
con.set('example', 'hola')
con.set('exaaaaa', 'hola2')

In [None]:
%%time
a = con.tensorget('example:fc1-weight', as_numpy=False)


### Set all the model gradients to the database

In [None]:
%%time
psId = 'example'

for n, l in model.named_children():
    if hasattr(l, 'bias'):
        key_w = f'{psId}:{n}-weight'
        key_b = f'{psId}:{n}-bias'

        print('Setting', key_w)
        con.tensorset(key_w, l.weight.cpu().detach().numpy(), dtype='float32')

        print('Setting', key_b)
        con.tensorset(key_b, l.bias.cpu().detach().numpy(), dtype='float32')
    
    

### Set the model gradients from the dict

In [None]:
for k, v in tensor_d.items():
    print('Setting' , k)
    con.tensorset(f'{k}/1', v.cpu().numpy())

### Try to get all of the layers from redis

In [None]:
for n, l in m2.named_children():
    if hasattr(l, 'bias'):
        key_w = f'{psId}:{n}-weight'
        key_b = f'{psId}:{n}-bias'
            
        print('Getting', key_w)
        t = con.tensorget(key_w)
        l.weight = torch.nn.Parameter(torch.from_numpy(t))

        print('Getting', key_b)
        t = con.tensorget(key_b)
        l.bias =torch.nn.Parameter(torch.from_numpy(t))

### Try to set the model to REDIS

We can simply save the state dict and retrieve it super quickly from the following functions as a python object

In [None]:
def save_model_weights(m: nn.Module, id):
    r"""After the init task we should save the model gradients to the database

    Instead of looking if a layer has a bias term (some of the batch norm can have it,
    look if the layer is of type conv or not"""
    print('Saving model to the database')
    with torch.no_grad():
        for name, layer in m.named_modules():
            if _is_optimizable(layer):

                # Save the weights
                print(f'Setting weights for layer {name}')
                weight_key = f'{id}:{name}.weight'
                con.tensorset(weight_key, layer.weight.cpu().detach().numpy(), dtype='float32')

                # Save the bias if not None
                if layer.bias is not None:
                    print(f'Setting bias for layer {name}')
                    bias_key = f'{id}:{name}.bias'
                    con.tensorset(bias_key, layer.bias.cpu().detach().numpy(), dtype='float32')

    print('Saved model to the database')
    
def load_model_weights(m: nn.Module, id: str):
    """Load the model weights saved in the database to start the new epoch"""
    print('Loading model from database')
    with torch.no_grad():
        for name, layer in m.named_modules():
            # only load and save layers that are optimizable (conv or fc)
            if _is_optimizable(layer):

                # Load the weight
                print(f'Loading weights for layer {name}')
                weight_key = f'{id}:{name}.weight'
                w = con.tensorget(weight_key)
                layer.weight = torch.nn.Parameter(torch.from_numpy(w))

                # If the layer has an active bias retrieve it
                # Some of the layers in resnet do not have bias
                # or it is None. It is not needed with BN, so skip it
                if layer.bias is not None:
                    print(f'Loading bias for layer {name}')
                    bias_key = f'{id}{name}.bias'
                    w = con.tensorget(bias_key)
                    layer.bias = torch.nn.Parameter(torch.from_numpy(w))

    print('Model loaded from database')
    
def load_state_dict(m: nn.Module, id) -> dict:
    d = dict()
    for name, layer in m.named_modules():
        # only load and save layers that are optimizable (conv or fc)
        if _is_optimizable(layer):

            # Load the weight
            print(f'Loading weights for layer {name}')
            weight_key = f'{id}:{name}.weight'
            w = con.tensorget(weight_key)
            # set the weight
            d[weight_key[9:]] = torch.from_numpy(w)

            # If the layer has an active bias retrieve it
            # Some of the layers in resnet do not have bias
            # or it is None. It is not needed with BN, so skip it
            if layer.bias is not None:
                print(f'Loading bias for layer {name}')
                bias_key = f'{id}:{name}.bias'
                w = con.tensorget(bias_key)
                # set the bias
                d[bias_key[9:]] = torch.from_numpy(w)
    return d

In [None]:
d = load_state_dict(model, id='aa11a789' )

In [None]:
model.state_dict().keys()

# model.load_state_dict(d)

In [None]:
model.state_dict()

In [None]:
import pickle
p = pickle.dumps(model.state_dict())

In [None]:
con.set('model', p)

In [None]:
%%time

d = con.get('model')

In [None]:
s = pickle.loads(d)

In [None]:
model.load_state_dict(s)

### Save the layer names in redis

m = Net()

In [None]:
# How to get the weighted layers and save the model
[n for n, l in m.named_children() if hasattr(l, "bias")]

In [None]:
l = " ".join([n for n, l in m.named_children() if hasattr(l, "bias")])
con.set("layers", l)

In [None]:
for l in [n for n, l in m.named_children() if hasattr(l, "bias")]:
    con.rpush('layers', l)

In [None]:
import json

layers = ['conv1', 'conv2', 'fc1', 'fc2']
l = json.dumps(layers)

l

### Save the layers by making an http request to the server

In [None]:
import requests

In [None]:
print(l)

requests.post("http://localhost:58682/layers", data=l)