In [12]:
import qai_hub as hub
import torch
from torchvision.models import mobilenet_v2
import requests
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [13]:
import torch.nn as nn
import torch.nn.functional as F

import torchviz
import torchsummary

# Creates a simple scalable CNN model to test the maximum amount of RAM and layers supported by the QAI Hub

class SimpleCNN(nn.Module):
    def __init__(self, imgRes, convChannels, fcChannels):
        super(SimpleCNN, self).__init__()

        # Assuming 3 input channels (RGB images)

        imgWidth, imgHeight = imgRes

        inChannels = 3

        # Creates convolutional layers with the number of channels specified in the convChannels list

        self.convLayers = []

        for convLayer in convChannels:
            self.convLayers.append(nn.Conv2d(in_channels=inChannels, out_channels=convLayer, kernel_size=3, stride=1, padding=1))
            inChannels = convLayer

        #self.convLayers = nn.ModuleList(convLayers)

        self.convLayers = nn.Sequential(*self.convLayers)

        self.fcLayers = []

        # Creates fully connected layers with the number of neurons specified in the fcChannels list

        inNeurons = convChannels[-1] * imgWidth * imgHeight

        for fcLayer in fcChannels:
            self.fcLayers.append(nn.Linear(inNeurons, fcLayer))
            self.fcLayers.append(nn.ReLU())
            inNeurons = fcLayer

        self.fcLayers = nn.Sequential(*self.fcLayers)

    def forward(self, x):
        x = self.convLayers(x)

        # Reshapes the output of the convolutional layers to be fed into the fully connected layers

        x = torch.flatten(x, 1)

        x = self.fcLayers(x)
        
        return x


In [14]:


# Instantiate model

imgRes = [224, 224]
convChannels = [16, 32, 64, 64, 32, 16]
fcChannels = [64]


model = SimpleCNN(imgRes, convChannels, fcChannels)

model.eval()



In [15]:
print(model)

summary = torchsummary.summary(model, (3, imgRes[0], imgRes[1]))

# Print model architecture

SimpleCNN(
  (convLayers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (fcLayers): Sequential(
    (0): Linear(in_features=802816, out_features=64, bias=True)
    (1): ReLU()
  )
)
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             448
            Conv2d-2         [-1, 32, 224, 224]           4,640
            Conv2d-3         [-1, 64, 224, 224]          18,496
            Conv2d-4         [-1, 64, 224, 224]          36,928
           

In [16]:
print(type(model.convLayers))

model.convLayers[0].weight.dtype

<class 'torch.nn.modules.container.Sequential'>


torch.float32

In [17]:
# Saves model to file

input_shape = (1, 3, 224, 224)
example_input = torch.rand(input_shape)
traced_torch_model = torch.jit.trace(model, example_input)
torch.jit.save(traced_torch_model, 'testModel.pt')

In [18]:
# Step 2: Compile model
compile_job = hub.submit_compile_job(
    model=traced_torch_model,
    device=hub.Device("Snapdragon 8 Elite QRD"),
    input_specs=dict(image=input_shape),
)

Uploading tmp282dmnsz.pt


100%|[34m██████████[0m| 196M/196M [02:47<00:00, 1.23MB/s] 


Scheduled compile job (j5q06v14p) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/j5q06v14p/



In [19]:
target_model = compile_job.get_target_model()

profile_job = hub.submit_profile_job(
    model=target_model,
    device=hub.Device("Snapdragon 8 Elite QRD"),
)


Scheduled profile job (jgl4vl885) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/jgl4vl885/



In [20]:
# Step 4: Run inference on cloud-hosted device
sample_image_url = (
    "https://qaihub-public-assets.s3.us-west-2.amazonaws.com/apidoc/input_image1.jpg"
)
response = requests.get(sample_image_url, stream=True)
response.raw.decode_content = True
image = Image.open(response.raw).resize((224, 224))
input_array = np.expand_dims(
    np.transpose(np.array(image, dtype=np.float32) / 255.0, (2, 0, 1)), axis=0
)

# Run inference using the on-device model on the input image
inference_job = hub.submit_inference_job(
    model=target_model,
    device=hub.Device("Snapdragon 8 Elite QRD"),
    inputs=dict(image=[input_array]),
)
on_device_output = inference_job.download_output_data()


Uploading dataset: 154kB [00:01, 144kB/s]                             872kB/s]


Scheduled inference job (jp3nj67l5) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/jp3nj67l5/

Waiting for inference job (jp3nj67l5) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          


tmp90ivxp7d.h5: 100%|[34m██████████[0m| 13.9k/13.9k [00:00<00:00, 36.1kB/s]
