In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
# setup cell to make our lives easier 
import re
import numpy as np
import os
from tqdm import tqdm
import random
import matplotlib.pyplot as plt
from collections import defaultdict
from utils import *
from eval_script import *
from customize_dataset import DexNetNPZDataset
from customize_dataset import DexNetNPZDatasetAll

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau


tensor_dir = '../../dexnet_2.1/dexnet_2.1_eps_50/tensors/'  # replace with actual path
batch_size = 32
use_regression = False  # or True
pose_dims = [0, 1, 2, 3, 4, 5]


In [2]:
# setup block. Run me!
#print(torch.cuda.device_count())
#print(torch.cuda.get_device_name())
#os.environ["CUDA_VISIBLE_DEVICES"]="1"
device = torch.device('cpu')

In [3]:
class SimpleGQCNN(nn.Module):
    def __init__(self, pose_dim=4, output_type='binary', merge_methods="element_dot"):
        """
        pose_dim: number of dimensions in the pose vector (e.g., x, y, z, theta)
        output_type: 'binary' or 'regression'
        """
        super(SimpleGQCNN, self).__init__()
        self.output_type = output_type

        # Image stream
        self.conv1 = nn.Conv2d(1, 16, 3)           # → (B, 16, 30, 30)
        self.pool = nn.MaxPool2d(2, 2)             # → (B, 16, 15, 15)
        self.conv2 = nn.Conv2d(16, 32, 3)          # → (B, 32, 13, 13) → pool → (B, 32, 6, 6)
        self.im_fc = nn.Linear(32 * 6 * 6, 64)     # → (B, 64)
        self.bn1 = nn.BatchNorm2d(16)
        self.bn2 = nn.BatchNorm2d(32)
        self.im_fc_bn = nn.BatchNorm1d(64)

        # Pose stream
        self.pose_fc1 = nn.Linear(pose_dim, 64)
        self.pose_fc2 = nn.Linear(64, 64)
        self.dropout = nn.Dropout(p=0.5)
        self.merge_methods = merge_methods
        if self.merge_methods == "element_dot":
            # Merge stream after elementwise multiplication
            self.merge_fc1 = nn.Linear(64, 32)
            self.merge_fc2 = nn.Linear(32, 1)
        else:
            # Merge stream by concatanation
            self.merge_fc1 = nn.Linear(64 + 64, 64)
            self.merge_fc2 = nn.Linear(64, 1)  # Single output for binary or regression

    def forward(self, image, pose):
        
        # Image stream
        x = self.pool(F.relu(self.bn1(self.conv1(image))))   # (B, 16, 15, 15)
        x = self.pool(F.relu(self.bn2(self.conv2(x))))       # (B, 32, 6, 6)
        # x = x.view(x.size(0), -1)                  # Flatten
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.im_fc(x))                  # (B, 64)

        # Pose stream
        p = self.dropout(F.relu(self.pose_fc1(pose)))            # (B, 64)
        p = self.dropout(F.relu(self.pose_fc2(p)))               # (B, 64)

        if self.merge_methods == "element_dot":
            # Element-wise multiplication
            combined = x * p                           # (B, 64)
        else:
            # Merge
            combined = torch.cat((x, p), dim=1)       # -> (B, 96)

        # Final layers
        out = F.relu(self.merge_fc1(combined))     # (B, 32)
        out = self.merge_fc2(out)                  # (B, 1)

        # if self.output_type == 'binary':
        #     out = torch.sigmoid(out)               # Binary prediction
        return out 


In [4]:
# load SimpleGQCNN
pose_dims = [0, 1, 2, 3, 4, 5]
use_regression = False  # or True

model = SimpleGQCNN(pose_dim=len(pose_dims), output_type='regression' if use_regression else 'binary')
model.load_state_dict(torch.load("eps_10/model.pth", weights_only=False, map_location=torch.device('cpu')))
model = model.to(device, memory_format=torch.channels_last)

In [5]:
tensor_dir = "../../dexnet_2.1/dexnet_2.1_eps_50/tensors/"
pose_dims = [0, 1, 2, 3, 4, 5]
run_model_evaluation(
    model=model,
    tensor_dir=tensor_dir,
    batch_size=batch_size,
    pose_dims=pose_dims,
    visualizations_dir="./eps_10",
    use_regression=use_regression,
    num_files=5
)

Using device: cuda
Found 35 tensor files

Processing 5 files...

Processing file 1/5...
File contains 1000 samples
Running inference on 32 batches...
Processed 1000 samples from file 1

Processing file 2/5...
File contains 1000 samples
Running inference on 32 batches...
Processed 1000 samples from file 2

Processing file 3/5...
File contains 1000 samples
Running inference on 32 batches...
Processed 1000 samples from file 3

Processing file 4/5...
File contains 1000 samples
Running inference on 32 batches...
Processed 1000 samples from file 4

Processing file 5/5...
File contains 1000 samples
Running inference on 32 batches...
Processed 1000 samples from file 5

Model evaluation complete! Results saved to 'torch_evaluation_results.txt'
Visualizations saved to directory: './eps_10'


(array([0.00186193, 0.71879286, 0.73538333, ..., 0.9605993 , 0.86230177,
        0.9345029 ], shape=(4080,), dtype=float32),
 array([0, 1, 0, ..., 1, 1, 1], shape=(4080,)),
 array([0.00111575, 0.00238624, 0.00428153, ..., 0.00401029, 0.00233087,
        0.00294281], shape=(4080,)))

In [6]:
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {pytorch_total_params}")

pytorch_total_params = sum(p.numel() for p in model.parameters())
print(f"Number of total parameters: {pytorch_total_params}")

Number of trainable parameters: 85537
Number of total parameters: 85537


In [None]:
# import itertools
# # Create dataset and dataloader
# train_loader, val_loader = DexNetDataloader(tensor_dir=tensor_dir, use_regression=use_regression, pose_dims=pose_dims)
 
# subset_loader = itertools.islice(val_loader, 5)  # Use only the first 5 batches

# evaluate_accuracy_with_confusion(model, subset_loader, device, threshold=0.5)

NameError: name 'evaluate_accuracy_with_confusion' is not defined