In [1]:
import os
import argparse
import torch
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import random
import cv2
import numpy as np
from pathlib import Path

from os2d.modeling.model import build_os2d_from_config
from os2d.config import cfg
from os2d.structures.feature_map import FeatureMapSize
from os2d.data import dataloader
from os2d.data.dataloader import build_eval_dataloaders_from_cfg, build_train_dataloader_from_config
from os2d.engine.train import trainval_loop
from os2d.engine.optimization import create_optimizer
from os2d.utils import (
    setup_logger, 
    read_image, 
    get_image_size_after_resize_preserving_aspect_ratio,
    set_random_seed,
    get_trainable_parameters,
    mkdir,
    save_config,
    get_data_path
)
from os2d.utils.visualization import *
import os2d.utils.visualization as visualizer

from src.util.detection import generate_detection_boxes
from src.util.visualize import visualize_boxes_on_image
from src.util.filter import DataLoaderDB

from src.lcp.ct_aoi_align import ContextAoiAlign

from src.lcp.lcp import LCP

from src.lcp.aux_net import AuxiliaryNetwork
from src.util.prune_db import PruneDBControler
from src.lcp.pruner import Pruner
from src.util.loss import LCPFinetuneCriterion
from src.lcp.lcpfinetune import LCPFineTune

In [2]:
if cfg.is_cuda:
    assert torch.cuda.is_available(), "Do not have available GPU, but cfg.is_cuda == 1"
    torch.backends.cudnn.benchmark = True

# random seed
set_random_seed(cfg.random_seed, cfg.is_cuda)

# Model
net, box_coder, criterion, img_normalization, optimizer_state = build_os2d_from_config(cfg)

# Optimizer
parameters = get_trainable_parameters(net)
optimizer = create_optimizer(parameters, cfg.train.optim, optimizer_state)

# load the dataset
data_path = get_data_path()
dataloader_train, datasets_train_for_eval = build_train_dataloader_from_config(cfg, box_coder, img_normalization,
                                                                                data_path=data_path)

dataloaders_eval = build_eval_dataloaders_from_cfg(cfg, box_coder, img_normalization,
                                                    datasets_for_eval=datasets_train_for_eval,
                                                    data_path=data_path)

In [6]:
images, class_images, loc_targets, cls_targets, class_ids, class_sizes, transforms, boxes, img_sizes = dataloader_train.get_batch(0)
print( type(class_ids))

Image 0 size FeatureMapSize(w=3264, h=2448) has 30 boxes
Image 1 size FeatureMapSize(w=2448, h=3264) has 28 boxes
Image 2 size FeatureMapSize(w=3264, h=2448) has 35 boxes
Image 3 size FeatureMapSize(w=2448, h=3264) has 22 boxes
<class 'list'>


In [None]:


db = DataLoaderDB( path = './src/db/data.csv' , dataloader = dataloader_train)
# db.initialize_csv()

transform_image = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize(img_normalization["mean"], img_normalization["std"])
                      ])

context_aoi_align = ContextAoiAlign( db, dataloader_train, transform_image , net , cfg )

aux_net = AuxiliaryNetwork( context_aoi_align, db )

lcp = LCP(net, aux_net, dataloader_train)
lcp.init_for_indices()
img_tensor = lcp.get_image_tensor_from_dataloader(image_id=0)
layers = lcp.get_layers_name()
for name, ch in layers:
    print(f"{name}: {ch} channels")
layer_name = 'net_feature_maps.layer3.2.conv2'  # 根據您的網路架構調整
lambda_rate = 1.0
use_image_num = 3  # 使用少量圖像進行測試
# 檢查設備一致性
print("=== 設備一致性檢查 ===")

# 檢查網路設備
net_device = next(lcp._net.parameters()).device
prune_net_device = next(lcp._prune_net.parameters()).device

print(f"主網路設備: {net_device}")
print(f"剪枝網路設備: {prune_net_device}")

# 檢查圖像張量設備
img_tensor = lcp.get_image_tensor_from_dataloader(0, is_cuda=False)
print(f"圖像張量設備: {img_tensor.device}")

print(f"測試層: {layer_name}")
print(f"Lambda 率: {lambda_rate}")
print(f"使用圖像數: {use_image_num}")
prune_db = PruneDBControler( path = './src/db/prune_channel_information.csv' )
prune_db.initial()

layers = lcp.get_layers_name()
for name, ch in layers:
    if name == 'layer2.0.conv2':
        pass
    else:
        continue
    print(f"{name}: {ch} channels")
    keep, discard = lcp.get_channel_selection_by_no_grad(
        layer_name   = f"net_feature_maps.{name}",
        discard_rate = 0.5,
        lambda_rate  = 1.0,
        use_image_num= 3,
        random_seed  = 42
    )
    print(f"layer {name} , 預計保留通道數量: {len(keep)}/{ch}, 預計捨棄通道數量: {len(discard)}/{ch}")
    prune_db.write_data(
        layer = f"net_feature_maps.{name}",
        original_channel_num= len(keep) + len(discard),
        num_of_keep_channel = len(keep),
        keep_index  = keep
    )
    
print( layers )
layer_names = []
for name, ch in layers:
    if name.endswith('.conv1') or name.endswith('.conv2'):
        layer_names.append( name )

pruner = Pruner( lcp._prune_net )
pruner.set_prune_db( prune_db )

lcp_criterion = LCPFinetuneCriterion(
    original_criterion=criterion,  # 原始損失函數實例
    aux_net=lcp._aux_net,  # 包含 aux_loss 方法的實例
    auxiliary_weight=1.0  # 可以調整輔助損失權重
)

[LCP] 初始化完成，共 43 層的 channel 索引
conv1: 64 channels
layer1.0.conv1: 64 channels
layer1.0.conv2: 64 channels
layer1.0.conv3: 256 channels
layer1.0.downsample.0: 256 channels
layer1.1.conv1: 64 channels
layer1.1.conv2: 64 channels
layer1.1.conv3: 256 channels
layer1.2.conv1: 64 channels
layer1.2.conv2: 64 channels
layer1.2.conv3: 256 channels
layer2.0.conv1: 128 channels
layer2.0.conv2: 128 channels
layer2.0.conv3: 512 channels
layer2.0.downsample.0: 512 channels
layer2.1.conv1: 128 channels
layer2.1.conv2: 128 channels
layer2.1.conv3: 512 channels
layer2.2.conv1: 128 channels
layer2.2.conv2: 128 channels
layer2.2.conv3: 512 channels
layer2.3.conv1: 128 channels
layer2.3.conv2: 128 channels
layer2.3.conv3: 512 channels
layer3.0.conv1: 256 channels
layer3.0.conv2: 256 channels
layer3.0.conv3: 1024 channels
layer3.0.downsample.0: 1024 channels
layer3.1.conv1: 256 channels
layer3.1.conv2: 256 channels
layer3.1.conv3: 1024 channels
layer3.2.conv1: 256 channels
layer3.2.conv2: 256 channels
laye

In [4]:
lcp.set_prune_db( prune_db )
for idx, layer_name in enumerate(layer_names):
    if idx != 15:
        continue
    lcp.prune_layer(
        layer_name   = layer_name,
        discard_rate = 0.8,
    )
    break

[LCP] 開始基於數學推導的無梯度通道重要性計算 - net_feature_maps.layer3.0.conv2
[397, 629, 437]
[LOG] 原始網路特徵提取完成
[LOG] 剪枝網路特徵提取完成
397 {'channels': [{'l1_norm': 0.49752166867256165, 'variance': 0.42441627383232117, 'mean_deviation': 0.32325613498687744, 'energy': 0.42437100410461426, 'sparsity': 0.0, 'importance': 0.4560863673686981}, {'l1_norm': 0.5900108218193054, 'variance': 0.479300320148468, 'mean_deviation': 0.5353514552116394, 'energy': 0.5248606204986572, 'sparsity': 0.0, 'importance': 0.5386933088302612}, {'l1_norm': 0.8703845739364624, 'variance': 0.9372933506965637, 'mean_deviation': 0.28254762291908264, 'energy': 1.3023971319198608, 'sparsity': 0.0, 'importance': 0.7318464517593384}, {'l1_norm': 0.5571710467338562, 'variance': 0.47396528720855713, 'mean_deviation': 0.4513489007949829, 'energy': 0.4907011389732361, 'sparsity': 0.0, 'importance': 0.510222315788269}, {'l1_norm': 0.6636227369308472, 'variance': 0.5451728105545044, 'mean_deviation': 0.7361550331115723, 'energy': 0.7168207764625549, 

In [5]:
lcp_finetune = LCPFineTune(
    prune_net = lcp._prune_net,
    dataloader_train = dataloader_train,
    img_normalization = img_normalization,
    box_coder = box_coder,
    cfg       = cfg,
    optimizer=optimizer,
    parameters=parameters
)

In [8]:
point1_x = float( db.get_specific_data(0 , 0 , 'point1_x' )[0] )
point1_y = float( db.get_specific_data(0 , 0 , 'point1_y' )[0] )
point2_x = float( db.get_specific_data(0 , 0 , 'point2_x' )[0] )
point2_y = float( db.get_specific_data(0 , 0 , 'point2_y' )[0] )   

point1 = ( point1_x , point1_y )
point2 = ( point2_x , point2_y )
image_id = 0
class_id = 0 
m = 5

In [9]:
lcp_criterion.aux_net.aux_loss( 0 , 0 , point1 , point2)

OutOfMemoryError: CUDA out of memory. Tried to allocate 138.00 MiB (GPU 0; 4.00 GiB total capacity; 3.26 GiB already allocated; 0 bytes free; 3.33 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [6]:
cfg.defrost()

cfg.train.optim.max_iter = 10
cfg.train.do_training = True
cfg.output.print_iter = 2
cfg.eval.iter = 5
cfg.train.batch_size = 1
cfg.train.batch_size = 1  # 最小批次
cfg.train.accumulate_grad_batches = 1

# 關閉不必要的功能
if hasattr(cfg.train, 'mining'):
    cfg.train.mining.do_mining = False
if hasattr(cfg.output, 'best_model'):
    cfg.output.best_model.do_get_best_model = False

cfg.freeze()
        

In [7]:
lcp_finetune.start_finetune(
    criterion= lcp_criterion,  # 使用自定義損失函數
)

Image 33 size FeatureMapSize(w=3264, h=2448) has 16 boxes
Image 499 size FeatureMapSize(w=2448, h=3264) has 12 boxes
Image 14 size FeatureMapSize(w=2448, h=3264) has 15 boxes
Image 669 size FeatureMapSize(w=2448, h=3264) has 20 boxes


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 4.00 GiB total capacity; 3.23 GiB already allocated; 0 bytes free; 3.31 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF