In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, balanced_accuracy_score
from sklearn.utils import class_weight
import math
import gc
import os
import pickle
import random
from typing import List, Tuple
import tqdm
from sklearn.model_selection import train_test_split
import torch.optim as optim
import matplotlib.pyplot as plt
from IPython.display import clear_output
from res_pred_utils import get_smooth_egovel

os.chdir("/root/shared/Anytime-Lidar/tools")

In [2]:
from nuscenes import NuScenes

dataset_version = 'v1.0-trainval'
root_path = "../data/nuscenes/" + dataset_version
nusc = NuScenes(version=dataset_version, dataroot=root_path, verbose=True)

Loading NuScenes tables for version v1.0-trainval...
23 category,
8 attribute,
4 visibility,
64386 instance,
12 sensor,
10200 calibrated_sensor,
2631083 ego_pose,
68 log,
850 scene,
156788 sample,
2686189 sample_data,
5305850 sample_annotation,
4 map,
Done loading in 60.604 seconds.
Reverse indexing ...
Done reverse indexing in 9.3 seconds.


In [33]:
# CONSTANTS
do_classification = True

with open('resolution_dataset.pkl', 'rb') as f:
    io_dict = pickle.load(f)
    print(io_dict['fields'])
    io_tuples = io_dict['data']

# Each tuple has: 'coords', 'features', 'resolution', 'sample_tkn'
# remove duplicates first
scores = [tuple(io_tpl[1][:, 6].ravel()) for io_tpl in io_tuples]
mask = np.ones(len(scores), dtype=bool)
scores_set = set()
for i, scr in enumerate(scores):
    if scr in scores_set:
        mask[i] = False
    else:
        scores_set.add(scr)

io_tuples = [io_tpl for m, io_tpl in zip(mask, io_tuples) if m]
print('Number of samples in dataset after removing duplicates:', len(io_tuples))

with torch.no_grad():
    # output labels
    ap_scores = torch.tensor([io_tpl[2] for io_tpl in io_tuples], dtype=torch.float)
    
    if do_classification:
        outp_data = torch.argmax(ap_scores, dim=1)
    else:
        mins = ap_scores.min(1).values.unsqueeze(-1).repeat(1,5)
        maxs = ap_scores.max(1).values.unsqueeze(-1).repeat(1,5)
        outp_data = (ap_scores - mins) / (maxs-mins)
    # print('ap_scores', ap_scores.size(), ap_scores.dtype)
    # print('outp_data', outp_data.size(), outp_data.dtype)

    # Find which segments has stark AP different, prioritize using them by sorting
    ap_scores_copy = ap_scores.clone()
    src = torch.zeros((outp_data.size(0),1))
    ap_scores_copy.scatter_(1, outp_data.unsqueeze(-1), src)
    outp_data_2 = torch.argmax(ap_scores_copy, dim=1)
    diffs = ap_scores.gather(1, outp_data.unsqueeze(-1)) - ap_scores_copy.gather(1, outp_data_2.unsqueeze(-1))
    vals, inds = torch.sort(diffs.flatten(), descending=True)

    # sort and filter zeros
    inds = inds[vals > 0]
    outp_data = outp_data[inds]
    io_tuples = [io_tuples[i] for i in inds]

    # input labels
    labels = [torch.tensor(io_tpl[1], dtype=torch.int)[:, -1] for io_tpl in io_tuples]
    label_dists = torch.stack([torch.bincount(l - 1, minlength=10) for l in labels])
    num_labels_normalizer = 100 #max(100, label_dists.max())
    label_dists = label_dists.float() / num_labels_normalizer
    # print('label_dists', label_dists.size())
    
    sample_tokens = [io_tpl[3] for io_tpl in io_tuples]
    egovels = torch.tensor([np.linalg.norm(get_smooth_egovel(nusc, sample_tkn)[1][:2]) \
               for sample_tkn in sample_tokens]).float()
    egovels[torch.isnan(egovels)] = 0.
    egovels /= 15.0 # normalize
    
    inp_data = torch.cat((label_dists, egovels.unsqueeze(-1)), dim=1)

    #stratify data
    label_counts = torch.bincount(outp_data)
    num_samples_each_label = label_counts.min()

    inps_masked, outputs_masked = [], []
    for cur_label in range(len(label_counts)):
        mask = (outp_data == cur_label)
        inps_masked.append(inp_data[mask][:num_samples_each_label])
        outputs_masked.append(outp_data[mask][:num_samples_each_label])
    inp_data = torch.cat(inps_masked)
    outp_data = torch.cat(outputs_masked)
    print('inputs ater stratifying:', inp_data.size())
    print('outputs ater stratifying:', outp_data.size())
    print(torch.bincount(outp_data))

('coords', 'features', 'resolution', 'sample_tkn')
Number of samples in dataset after removing duplicates: 54680
inputs ater stratifying: torch.Size([26970, 11])
outputs ater stratifying: torch.Size([26970])
tensor([5394, 5394, 5394, 5394, 5394])


In [46]:
# Define the neural network architecture
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(11, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            # nn.Dropout(p=0.5),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 5))
            
    def forward(self, x):
        return self.layers(x)
# Initialize the model, loss function, and optimizer
model = SimpleNN().cuda()
model.train()
if do_classification:
    criterion = nn.CrossEntropyLoss()
else:
    criterion = nn.MSELoss()  # Using MSE as we have continuous values
optimizer = optim.Adam(model.parameters(), lr=5e-5)

dataset = TensorDataset(inp_data, outp_data)
dataloader = DataLoader(dataset, batch_size=512, shuffle=True)

epochs = 500
for epoch in range(epochs):
    running_loss = 0.0
    for batch_inputs, batch_outputs in dataloader:
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(batch_inputs.cuda())
        loss = criterion(predictions, batch_outputs.cuda())
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        running_loss+=loss.item()
    
    # Print loss every 10 epochs
    # scheduler.step(running_loss)
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


Epoch [10/500], Loss: 0.9919
Epoch [20/500], Loss: 0.7829
Epoch [30/500], Loss: 0.6091
Epoch [40/500], Loss: 0.5408
Epoch [50/500], Loss: 0.4427
Epoch [60/500], Loss: 0.3587
Epoch [70/500], Loss: 0.3646
Epoch [80/500], Loss: 0.3872
Epoch [90/500], Loss: 0.3114
Epoch [100/500], Loss: 0.2794
Epoch [110/500], Loss: 0.2764
Epoch [120/500], Loss: 0.3362
Epoch [130/500], Loss: 0.3395
Epoch [140/500], Loss: 0.2427
Epoch [150/500], Loss: 0.2438
Epoch [160/500], Loss: 0.1803
Epoch [170/500], Loss: 0.2000
Epoch [180/500], Loss: 0.2667
Epoch [190/500], Loss: 0.1890
Epoch [200/500], Loss: 0.2486
Epoch [210/500], Loss: 0.1607
Epoch [220/500], Loss: 0.1937
Epoch [230/500], Loss: 0.2177
Epoch [240/500], Loss: 0.2903
Epoch [250/500], Loss: 0.1947
Epoch [260/500], Loss: 0.1721
Epoch [270/500], Loss: 0.1771
Epoch [280/500], Loss: 0.1739
Epoch [290/500], Loss: 0.1799
Epoch [300/500], Loss: 0.1703
Epoch [310/500], Loss: 0.1531
Epoch [320/500], Loss: 0.1421
Epoch [330/500], Loss: 0.1641
Epoch [340/500], Lo

In [47]:
model.eval()
with torch.no_grad():
    outputs = model(inp_data.cuda())
    _, predictions = torch.max(outputs, 1)
    print((predictions == outp_data.cuda()).sum() / outp_data.size(0))

tensor(0.9700, device='cuda:0')


In [48]:
#ONNX export
model.eval()
input_names=['objcount_and_egovel']
print('Input shape:', inp_data[:1, :].size())
# dynamic_axes = {
#     "objcount_and_egovel": {
#         0: "batch"
#     }
# }

torch.onnx.export(
        model,
        inp_data[:1, :].cuda(),
        'resolution_pred_mdl.onnx',
        input_names=input_names,
        output_names=["res_scores"],
#         dynamic_axes=dynamic_axes,
        opset_version=17,
#         custom_opsets={"cuda_slicer": 17},
)
print('Done!')

Input shape: torch.Size([1, 11])
Done!
