In [2]:
import torch
import torchvision.transforms as T
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
import qai_hub as hub
from torchcam.methods import GradCAM
import segmentation_models_pytorch as smp
import torch.nn as nn

In [3]:
class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = smp.UnetPlusPlus(
            encoder_name='resnext50_32x4d',
            in_channels=3,
            classes=1  # Outputting a single channel for depth map
        )
        
    def trainable_encoder(self, trainable=True):
        for p in self.model.encoder.parameters():
            p.requires_grad = trainable
        
    def forward(self, x):
        return self.model(x)
    
    def _num_params(self):
        return sum([p.numel() for p in self.model.parameters() if p.requires_grad])

In [4]:
import torch
from collections import OrderedDict

# Define your UNet model
unet_model = UNet().to('cpu')

# Load the state dictionary from the checkpoint
state_dict = torch.load('./nyu-v2-depth-resnext50_32x4d-unetplusplus.pt', map_location='cpu')

# Remove the "module." prefix if it exists
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    new_key = k.replace("module.", "")
    new_state_dict[new_key] = v

# Load the new state dictionary into your model
unet_model.load_state_dict(new_state_dict)
unet_model.eval()

  state_dict = torch.load('./nyu-v2-depth-resnext50_32x4d-unetplusplus.pt', map_location='cpu')


UNet(
  (model): UnetPlusPlus(
    (encoder): ResNetEncoder(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

In [6]:
! qai-hub configure --api_token f0303b7ee7e33b83a9cb5c542745c3b40fd33516

qai-hub configuration saved to C:\Users\Amritha/.qai_hub/client.ini
[api]
api_token = f0303b7ee7e33b83a9cb5c542745c3b40fd33516
api_url = https://app.aihub.qualcomm.com
web_url = https://app.aihub.qualcomm.com
verbose = True




In [4]:
# ---------------------------
# Trace the model
# ---------------------------
example_input = torch.rand((1, 3, 480, 640))
traced_model = torch.jit.trace(unet_model, example_input)

# ---------------------------
# Compile with AI Hub
# ---------------------------
device = hub.Device("Samsung Galaxy S24 (Family)")

compile_job = hub.submit_compile_job(
    name='unetplusplus_depth_estimation',
    model=traced_model,
    device=device,
    input_specs={"image": (1, 3, 480, 640)}
)

print("⏳ Compiling model...")
compile_job.wait()

  if h % output_stride != 0 or w % output_stride != 0:


Uploading tmpu0u_1xv_.pt


100%|[34m██████████[0m| 186M/186M [00:45<00:00, 4.24MB/s] 


Scheduled compile job (j5qeqy4m5) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/j5qeqy4m5/

⏳ Compiling model...
Waiting for compile job (j5qeqy4m5) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          


JobStatus
---------
code    : SUCCESS
message : 

In [5]:
# Download compiled artifacts
compile_job.download_results(artifacts_dir="./compile_job_results")
compiled_model = compile_job.get_target_model()

job_j5qeqy4m5_optimized_tflite_mnjlk6dxq.tflite: 100%|[34m██████████[0m| 185M/185M [00:12<00:00, 15.5MB/s] 


Downloaded model to e:\College\S6\CV\indoor-monocular-depth-estimation\unet-plus-plus-depth\compile_job_results\job_j5qeqy4m5_optimized_tflite_mnjlk6dxq.tflite


In [6]:
# ---------------------------
# Profile the compiled model
# ---------------------------
profile_job = hub.submit_profile_job(
    name='unetplusplus_depth_profile',
    model=compiled_model,
    device=device
)

print("⏳ Profiling on device...")
profile_job.wait()
profile_job.download_results(artifacts_dir="./profile_job_results")

Scheduled profile job (j56e47o7g) successfully. To see the status and results:
    https://app.aihub.qualcomm.com/jobs/j56e47o7g/

⏳ Profiling on device...
Waiting for profile job (j56e47o7g) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Saved profile results to e:\College\S6\CV\indoor-monocular-depth-estimation\unet-plus-plus-depth\profile_job_results\unetplusplus_depth_profile_j56e47o7g_results.json


ProfileJobResult
----------------
status                        : JobStatus
---------
code    : SUCCESS
message : 

url                           : https://app.aihub.qualcomm.com/jobs/j56e47o7g/
artifacts_dir                 : e:\College\S6\CV\indoor-monocular-depth-estimation\unet-plus-plus-depth\profile_job_results
Estimated Inference Time (ms) : 60.711
Load Time (ms)                : 10844.763
Peak Memory (MB)              : 743.68359375
Compute Units (layers)        : NPU: 121

In [12]:
profile_result = profile_job.download_profile()
print("\n✅ Profile Job Completed!")
print(f"📄 Name: {profile_job.name}")
print(f"📱 Target Device: {profile_job.device.name}")


✅ Profile Job Completed!
📄 Name: unetplusplus_depth_profile
📱 Target Device: Samsung Galaxy S24 (Family)


In [None]:
# # ---------------------------
# # Run Inference on Test Images
# # ---------------------------
# transform = T.Compose([
#     T.Resize((480, 640)),
#     T.ToTensor()
# ])

# def load_image(path):
#     img = Image.open(path).convert('RGB')
#     return transform(img).unsqueeze(0)

# def save_depth_map(depth, output_path):
#     depth_np = depth.squeeze().detach().cpu().numpy()
#     plt.imsave(output_path, depth_np, cmap='inferno')

# # Load test images
# test_dir = '../test_data/indoor'
# test_images = [os.path.join(test_dir, f) for f in os.listdir(test_dir) if f.endswith(('.jpg', '.png'))]
# os.makedirs("depth_predictions", exist_ok=True)

# for i, img_path in enumerate(test_images):
#     img_tensor = load_image(img_path)
#     with torch.no_grad():
#         pred = unet_model(img_tensor)
#         save_path = f"depth_predictions/depth_{i+1}.png"
#         save_depth_map(pred, save_path)

# print(f"\n🖼️ Saved {len(test_images)} predicted depth maps to 'depth_predictions/' folder.")


🖼️ Saved 100 predicted depth maps to 'depth_predictions/' folder.
