In [1]:
import os
import torch
import torchvision.models as models
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import time
from math import ceil
from torchinfo import summary
import json
import matplotlib.pyplot as plt
import numpy as np
import random
import seaborn as sns
import sys

os.chdir('/EdMIPS')
# EdMIPS/models ディレクトリへのパスを追加
sys.path.append('/EdMIPS/models')
import models as mymodels
from matplotlib.colors import ListedColormap
from pprint import pprint
import matplotlib.ticker as ticker
from models.quant_efficientnet import BasicCNNBlock

VERSION = 0
SELECTED_GPUS = [0, 1]
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(gpu_number) for gpu_number in SELECTED_GPUS])

assert torch.cuda.is_available()

# GPUの存在を確認
if torch.cuda.device_count() < 2:
    raise RuntimeError("このコードは少なくとも2つのGPUが必要です。")

device_count = torch.cuda.device_count()
print('Number of devices: {}'.format(device_count))

for gpu in range(device_count):
    print(f"Device {gpu}: {torch.cuda.get_device_name(gpu)}")

plt.rcParams.update({'font.size': 22})

# カレントディレクトリを 'EdMIPS' に変更
os.chdir('/EdMIPS')


BATCH_TABLE_DIR = 'batch_table'
if not os.path.exists(BATCH_TABLE_DIR):
    os.makedirs(BATCH_TABLE_DIR)

PLOT_LINESTYLES = ['-', '--', '-.', ':']
PLOT_MARKERS = ['o', 'v', 'P', 'X', 'D', '^', 's']
FIGURE_SIZE = (15, 5)

ModuleNotFoundError: No module named 'seaborn'

In [None]:
def _load_arch(arch_path, names_nbits):
    checkpoint = torch.load(arch_path)
    state_dict = checkpoint['state_dict']
    best_arch, worst_arch = {}, {}
    for name in names_nbits.keys():
        best_arch[name], worst_arch[name] = [], []
    for name, params in state_dict.items():
        name = name.split('.')[-1]
        if name in names_nbits.keys():
            alpha = params.cpu().numpy()
            assert names_nbits[name] == alpha.shape[0]
            best_arch[name].append(alpha.argmax())
            worst_arch[name].append(alpha.argmin())

    return best_arch, worst_arch

def get_model(config):
    if config["model_name"] == 'vgg-16':
        model = models.vgg16(pretrained=True)
        # model.classifier[6] = nn.Linear(4096, config["num_classes"])
    elif config["model_name"] == 'vgg-19':
        model = models.vgg19(pretrained=True)
        # model.classifier[6] = nn.Linear(4096, config["num_classes"])
    elif config["model_name"] == 'resnet-18':
        model = models.resnet18(pretrained=True)
        # model.fc = nn.Linear(512, config["num_classes"])
    elif config["model_name"] == 'resnet-34':
        model = models.resnet34(pretrained=True)
        # model.fc = nn.Linear(512, config["num_classes"])
    elif config["model_name"] == 'resnet-50':
        model = models.resnet50(pretrained=True)
        # model.fc = nn.Linear(2048, config["num_classes"])
    elif config["model_name"] == 'resnet-101':
        model = models.resnet101(pretrained=True)
        # model.fc = nn.Linear(2048, config["num_classes"])
    
    elif config["model_name"] == 'quanteffnet_w8a8':
        archas = [8] *80
        model = mymodels.__dict__[config["model_name"]]("path")
        
    elif config["model_name"] == 'quanteffnet_cfg_2468':
        wbits, abits = [2, 4, 6, 8],  [2, 4, 6, 8]
        name_nbits = {'alpha_activ': len(abits), 'alpha_weight': len(wbits)}
        best_arch, worst_arch = _load_arch(config["arch_path"], name_nbits)
        archas = [abits[a] for a in best_arch['alpha_activ']]
        model =  mymodels.__dict__[config["model_name"]](config["arch_path"])
        
    elif config["model_name"] == 'quanteffnet_w8a8_b3':
        archas = [8] *129
        model = model = mymodels.__dict__[config["model_name"]]("path")
        
    elif config["model_name"] == 'quanteffnet_cfg_2468_b3':
        wbits, abits = [2, 4, 6, 8],  [2, 4, 6, 8]
        name_nbits = {'alpha_activ': len(abits), 'alpha_weight': len(wbits)}
        best_arch, worst_arch = _load_arch(config["arch_path"], name_nbits)
        archas = [abits[a] for a in best_arch['alpha_activ']]
        model = mymodels.__dict__[config["model_name"]](config["arch_path"])
        
    elif config["model_name"] == 'efficient_baseline':
        archas = [2] *80     # 全ての層の分割点を計測したいため、分割点が選ばれるように2bitで計測
        model = mymodels.__dict__["quanteffnet_w8a8"]("path")
        
    elif config["model_name"] == 'efficientb3_baseline':
        archas = [2] *129     # 全ての層の分割点を計測したいため、分割点が選ばれるように2bitで計測
        model = mymodels.__dict__["quanteffnet_w8a8_b3"]("path")
    # 変更箇所
    elif config["model_name"] == 'quanteffnet_cfg':
        wbits, abits = [2, 4, 6, 8],  [2, 3, 4]
        name_nbits = {'alpha_activ': len(abits), 'alpha_weight': len(wbits)}
        best_arch, worst_arch = _load_arch(config["arch_path"], name_nbits)
        archas = [abits[a] for a in best_arch['alpha_activ']]
        model = mymodels.__dict__[config["model_name"]](config["arch_path"])
        
    else:
        raise ValueError(f"Unsupported model_name: {config['model_name']}")
    return model, archas

In [None]:
def get_natural_bottlenecks(model, input_size, act_bits, compressive_only=True):
    # 各層のinputサイズを計算して、圧縮率が最も高い層を探す
    natural_bottlenecks = []
    best_compression = 1.0
    cnn_count = 0  # CNNレイヤーのカウント
    input_bit = 8 # 入力のbit数
    min_bit = 8  # 探索する最小のbit数←使って無くない？
    bit_compression = [act_bit / input_bit for act_bit in act_bits]

    device = next(model.parameters()).device
    
    mock_input = torch.randn(1, 3, input_size, input_size).to(device)
    previous_size = torch.prod(torch.tensor(mock_input.shape[1:])).item()

    for i, module in enumerate(model.features):
        # print(i, module)
        block_number = i-1 
        # 0はfeaturesの最初のBasicCNNBlockなので、1から始める
        if isinstance(module, BasicCNNBlock):
            print(f"Encountered BasicBlock at features.{i}")
            output = module(mock_input)
            mock_input = output.detach()
            continue
        
        input_size_layer = torch.prod(torch.tensor(mock_input.shape[1:])).item()
        if input_size_layer * act_bits[cnn_count] < input_size * input_size * 3 * input_bit:
            compression = float(input_size_layer) / (input_size * input_size * 3)
            compression *= bit_compression[cnn_count]
            print(i,block_number)
            if not compressive_only or compression < best_compression:
                natural_bottlenecks.append({
                    'layer_name': "blocks_{}".format(block_number),
                    'compression': compression,
                    'cnn_layer_number': cnn_count,  # ここでCNNレイヤーの番号を記録
                    'block_number': block_number,  
                })
                best_compression = compression
        output = module(mock_input)
        mock_input = output.detach()
        
        cnn_count += count_conv2d_layers(module)
    return natural_bottlenecks

def count_conv2d_layers(model):
    count = 0
    for module in model.modules():
        if isinstance(module, nn.Conv2d):
            count += 1
        elif isinstance(module, nn.Sequential):
            # Sequentialブロック内でさらにConv2dを探す
            for sub_module in module:
                if isinstance(sub_module, nn.Conv2d):
                    count += 1
    return count

In [None]:
def get_inference_time(model, batch_size, device, repetitions=300, input_shape=None, intermediate=None,):
    
    """
    Get the inference time of a model for a given input shape and batch size.
    """
    # モデルを評価モードに設定し、適切なデバイスに移動
    model = model.eval().to(device)

    if input_shape is None:
        input_shape = (batch_size, 3, config['image_size'], config['image_size'])
    else:
        input_shape = (batch_size,) + input_shape[1:]
    
    if intermediate is None:
        input_data = torch.ones(input_shape, device=device)  # the original code uses dtype=torch.float16, which would be 2 bytes
    else:
        # input_data = intermediate.to(device)
        input_data = intermediate

    # ウォームアップフェーズ
    with torch.no_grad():
        # for _ in range(repetitions):   #10回程度で十分
        for _ in range(10): 
            model(input_data)
            
    # CUDAカーネルの同期化        
    torch.cuda.synchronize(device)  # Make sure all CUDA operations have finished

    # 推論時間の計測開始
    start = time.perf_counter()
    
    with torch.no_grad():
        for _ in range(repetitions):
            model(input_data)
            
    # 再度、CUDAカーネルの同期化
    torch.cuda.synchronize(device)
    end = time.perf_counter()
    
    # 平均推論時間をミリ秒単位で計算
    inference_time = (end - start) / repetitions * 1000  # ミリ秒単位
    return inference_time

In [None]:
# この関数はなんのためにある？
def get_batch_table_path(config):
    if 'CPU' in config['processors']['weak']:
        return os.path.join(BATCH_TABLE_DIR, '%s_%s_v%d.json' % (
            config['model_name'],
            config['processors']['weak'].replace('/', ''),
            VERSION
        ))
    else:  # legacy name
       return os.path.join(BATCH_TABLE_DIR, '%s_v%d.json' % (
           config['model_name'],
           VERSION
       ))

def save_batch_table(batch_table, config):
    batch_table_path = get_batch_table_path(config)
    with open(batch_table_path, 'w') as batch_table_file:
        json.dump(batch_table, batch_table_file)

def load_batch_table(config):
    batch_table_path = get_batch_table_path(config)
    with open(batch_table_path, 'r') as batch_table_file:
        return json.load(batch_table_file)
    
    
def create_batch_table(config):
    # 一回で計測できるコードにできるはず
    model, act_bits = get_model(config)

    natural_bottlenecks = get_natural_bottlenecks(model, config["image_size"], act_bits)
    cnn_layer_numbers = [bottleneck['cnn_layer_number'] for bottleneck in natural_bottlenecks]
    block_numbers = [bottleneck['block_number'] for bottleneck in natural_bottlenecks]
    
    batch_table = {}

    for batch_size in config['batch_sizes']:
        print('Batch Size:', batch_size)
        batch_table[batch_size] = {}

        sys.stdout.write('\r%d/%d' % (1, len(natural_bottlenecks) + 2))
        sys.stdout.flush()
        batch_table[batch_size]['whole_device'] = get_inference_time(model, batch_size, device=config['processors']['weak']) 
    
        sys.stdout.write('\r%d/%d' % (2, len(natural_bottlenecks) + 2))
        sys.stdout.flush()
        batch_table[batch_size]['whole_edge'] = get_inference_time(model, batch_size, device=config['processors']['strong']) #full offloading

        for i, bottleneck in enumerate(natural_bottlenecks):
            sys.stdout.write('\r%d/%d' % (i + 3, len(natural_bottlenecks) + 2))
            sys.stdout.flush()

            if config['model_name'].startswith('efficientnet') or config['model_name'].startswith('vit'):
                # pretty_layer_name = '%s_%02d' % (
                #     bottleneck['layer_name'].split('_')[0],
                #     int(bottleneck['layer_name'].split('_')[-1])
                # )
                pretty_layer_name = bottleneck['layer_name']
            else:
                pretty_layer_name = bottleneck['layer_name']
            head_model = nn.Sequential(*list(model.features[:(bottleneck['block_number'] + 1)]))
            
            batch_table[batch_size][pretty_layer_name] = {
                'compression': bottleneck['compression'],
                'head': get_inference_time(head_model, batch_size, device=config['processors']['weak']),
            }
            
            input_shape = (batch_size, 3, config['image_size'], config['image_size'])
            # 入力は画像データではなくすべて1のデータ
            input_data = torch.ones(input_shape, device=config['processors']['weak'])  
            edge_output = head_model(input_data)
            edge_output = edge_output.to(config['processors']['strong'])
            # 元のモデルにFlattenとclassifierついてないの？
            server_model_layers = list(model.features[(bottleneck['block_number'] + 1):]) + [model.pool] +[nn.Flatten()]
            server_model_layers += list(model.classifier)

            
            tail_model = nn.Sequential(*server_model_layers)
            batch_table[batch_size][pretty_layer_name]['tail'] = get_inference_time(
                tail_model,
                batch_size, device=config['processors']['strong'], intermediate=edge_output
            )

            save_batch_table(batch_table, config)
        print()  # newline ←これなに？
    return batch_table

# これいつ使う？何用？
def split_efficientnet_model(model, split_layer, edge_device, server_device):
    edge_model = nn.Sequential(*list(model.features[:split_layer])).to(edge_device)
    server_model_layers = list(model.features[split_layer:]) + [model.pool] +[nn.Flatten()]
    server_model_layers += list(model.classifier)
    server_model = nn.Sequential(*server_model_layers).to(server_device)
    return edge_model, server_model


In [None]:
# 量子化しても送る時のbit幅は変わらない→量子化しても通信時間は変わらない
def get_load(compression, batch_size, config, full_offloading):
    # 3 channels
    load = batch_size * (config['image_size'] ** 2) * 3 * compression
    if not full_offloading:
        # load *= 4  # float32
        load *= 1 # int8をデフォルトに
    return load

def fix_legend_name(name):
    if name == 'whole_device':
        return 'No Offloading'
    elif name == 'whole_edge':
        return 'Full Offloading'
    else:
        # return 'Split at\n%s' % name.split('/')[-1]
        return name

def create_inference_plots(batch_table, config, create_individual=True):
    split_points = list(batch_table[list(batch_table.keys())[0]].keys()) #←なにこれ？
    # print(split_points)
    bandwidths = np.arange(config['min_bandwidth'], config['max_bandwidth'], config['bandwidth_step'])
    best_splits = {}
    gains = {}
    inference_times__all = []  #Gainのグラフを作るためのリスト 各バッチサイズの最小推論時間をいれる
    for batch_size in batch_table.keys():
        if create_individual:
            plt.figure(figsize=FIGURE_SIZE)
        entry = batch_table[batch_size]
        inference_times_list = []
        for i, split_point in enumerate(split_points):
            if split_point == 'whole_device':
                inference_times = np.repeat(entry[split_point], bandwidths.shape[0]) 
                if create_individual:
                    plt.ylim(0, entry[split_point] * 2)
            elif split_point == 'whole_edge':
                load = get_load(1, int(batch_size), config, True)
                inference_times = entry[split_point] + load / bandwidths * 1000  # in milliseconds
            else:
                load = get_load(entry[split_point]['compression'], int(batch_size), config, False)
                inference_times = entry[split_point]['head'] + entry[split_point]['tail'] + load / bandwidths * 1000  # in milliseconds
            linestyle = PLOT_LINESTYLES[i % len(PLOT_LINESTYLES)]
            if create_individual:
                plt.plot(
                    bandwidths / 10 ** 6,
                    inference_times,
                    label=fix_legend_name(split_point),
                    linestyle=linestyle
                )
            inference_times_list.append(inference_times)
        if create_individual:
            plt.xlabel('Data Rate (MBps)')
            plt.ylabel('Inference Time (ms)')
            plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
            save_path = os.path.join(INFERENCE_PLOT_DIR, '%s_%s_%s_v%d.png' % (
                config['model_name'],
                batch_size,
                config['processors']['weak'].replace('/', ''),
                VERSION
            ))
            plt.savefig(save_path, bbox_inches='tight')
            plt.close()
        best_split = np.argmin(np.array(inference_times_list), axis=0)
        best_splits[batch_size] = best_split
        inference_times__all.append(np.min(np.array(inference_times_list), axis=0))
        gains[batch_size] = {}
        for split_point_index, split_point in enumerate(split_points):
            absolute_diff = inference_times_list[split_point_index] - np.min(np.array(inference_times_list), axis=0)
            relative_diff = absolute_diff / inference_times_list[split_point_index]
            diff_percent = relative_diff * 100
            gains[batch_size][split_point] = np.clip(
                diff_percent,
                None,
                np.mean(diff_percent)
            )
    if not os.path.exists("inference_time"):
        os.makedirs("inference_time")
    save_path = os.path.join("inference_time", '%s_%s.npy' % (
        config['model_name'],
        config['graph_title'],
    ))
    np.save(save_path, inference_times__all) 
    
    total_points = 0
    useful_split_points = 0
    color_mapped_values = []
    plt.figure(figsize=FIGURE_SIZE)
    plt.gca().yaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    # 確認
    # print(split_points)
    # print(best_splits)
    
    for batch_size, best_split in sorted(best_splits.items(), key=lambda x: int(x[0])):
        color_mapped_values.append([])
        for bandwidth_index, entry in enumerate(best_split):
            for split_point_index, split_point in enumerate(split_points):
                if entry == split_point_index:
                    color_mapped_values[-1].append(split_point_index)
                    total_points += 1
                    # print(split_point)
                    if split_point != 'whole_edge' and split_point != 'whole_device':
                        useful_split_points += 1
                    break
    print(color_mapped_values)
    used_split_indices = np.sort(np.unique(np.array(color_mapped_values))).tolist()
    print(used_split_indices)
    used_colors = [PLOT_COLORS[used_split_index] for used_split_index in used_split_indices]
    print(used_colors)
    color_map = ListedColormap(used_colors)
    # print(color_map)

    # replace distinct values with their index of discovery
    previous_shape = np.array(color_mapped_values).shape
    _, color_mapped_values = np.unique(np.array(color_mapped_values), return_inverse=True)
    color_mapped_values = np.reshape(color_mapped_values, previous_shape)
    # colormesh = plt.pcolormesh(color_mapped_values, cmap=color_map)
    colormesh = plt.pcolormesh(color_mapped_values, cmap=color_map, edgecolors='w', linewidth=0.5)
    # legend
    cbar = plt.colorbar(colormesh)
    cbar.ax.get_yaxis().set_ticks([])
    max_value = np.amax(color_mapped_values)
    for j, used_split_index in enumerate(used_split_indices):
        cbar.ax.text(
            1.2,  # カラーバーからの相対位置
            j / len(used_split_indices) + 0.5 / len(used_split_indices),
            fix_legend_name(split_points[used_split_index]),
            ha='left',
            va='center',
            transform=cbar.ax.transAxes  
        )
    #################################################
    plt.xlabel('Data Rate (MBps)')
    plt.ylabel('Batch Size')
    # plt.title(r'(c) Ours ($\lambda = 1 \times 10^{-4}$)', fontsize=25, weight="bold")
    plt.title(config["graph_title"], fontsize=25, weight="bold")
    save_path = os.path.join(INFERENCE_PLOT_DIR, '%s_all_%s_v%d.svg' % (
        config['model_name'],
        config['processors']['weak'].replace('/', ''),
        VERSION
    ))
    plt.savefig(save_path, bbox_inches='tight')
    plt.close()
    # 何を確認する用か実行して確認
    print('Percent of scenarios where split computing is useful: %.2f%% (%d/%d)' % (
        useful_split_points / total_points * 100,
        useful_split_points,
        total_points
    ))
# print("gains:{}",format(gains))
    for split_point in split_points:
        heatmap_data = []
        for batch_size in sorted(gains.keys(), key=lambda x: int(x)):
            heatmap_data.append(gains[batch_size][split_point])
        heatmap_data = list(heatmap_data)
        fig, main_ax = plt.subplots()
        fig.set_size_inches(FIGURE_SIZE[0], FIGURE_SIZE[1])
        # ax = sns.heatmap(np.array(heatmap_data), cbar_kws={'label': 'Gain %'}, ax=main_ax)
        ax = sns.heatmap(np.array(heatmap_data), cbar_kws={'label': 'Gain %'}, ax=main_ax, linewidths=0.1, linecolor='w')
        ax.set_xlabel('Data Rate (MBps)')
        ax.set_ylabel('Batch Size')
        ax.invert_yaxis()
        save_path = os.path.join(INFERENCE_PLOT_DIR, '%s_gain_over_%s_%s_v%d.svg' % (
            config['model_name'],
            split_point.replace('/', ''),  # ViT has / in block names
            config['processors']['weak'].replace('/', ''),
            VERSION
        ))
        ax.figure.savefig(save_path, bbox_inches='tight')
        plt.close()
        
#batch_tableがすでに作られてる場合動かないので注意 
# def run_experiment(config, recreate=False):
#     if recreate or not os.path.exists(get_batch_table_path(config)):
#         print("run expereiment")
#         batch_table = create_batch_table(config)

# def run_experimentじゃダメなの？
def create_baseline_table(config):     # inference timeのテーブルを作成する時に使用する
    if 'baseline' in config["model_name"]:
        batch_table = create_batch_table(config)
    else:
        raise Exception("モデルにbaselineが指定されていません")

In [None]:
def create_inference_time_only(batch_table,config):
    split_points = list(batch_table[list(batch_table.keys())[0]].keys())
    bandwidths = np.arange(config['min_bandwidth'], config['max_bandwidth'], config['bandwidth_step'])
    
    for batch_size in config["batch_sizes"]:
        inference_times_list = []
        plt.figure(figsize=FIGURE_SIZE)
        entry = batch_table[str(batch_size)]
        for i, split_point in enumerate(split_points):
            # No Offloading(int8)
            if split_point == 'whole_device':
                whole_device_inference = np.repeat(entry['whole_device'], bandwidths.shape[0]) 
                # inference_times_list.append(whole_device_inference)
                plt.ylim(0, entry[split_point] * 2)
            # full_offloading(int8)
            elif split_point == 'whole_edge':
                load = get_load(1, int(batch_size), config, True)
                whole_edge_inference = entry[split_point] + load / bandwidths * 1000
                # inference_times_list.append(whole_edge_inference)
            # DSC(ours)
            else:
                load = get_load(entry[split_point]['compression'], int(batch_size), config, False)
                inference_times_list.append(entry[split_point]['head'] + entry[split_point]['tail'] + load / bandwidths * 1000)  # in milliseconds
        dsc_inference = []
        # dsc_inference_sp =[]
        for i in range(bandwidths.shape[0]):
            dsc_inference.append(min([bandwidth_time[i] for bandwidth_time in inference_times_list]))
            
            # dsc_inference_sp.append(split_points[np.argmin([bandwidth_time[i] for bandwidth_time in inference_times_list])])
        
        
        plt.plot(
            bandwidths / 10 ** 6,
            whole_device_inference,
            label="whole_device(int8)",
        )
        plt.plot(
            bandwidths / 10 ** 6,
            whole_edge_inference,
            label="whole_server(int8)",
            linestyle='-.'
        )
        plt.plot(
            bandwidths / 10 ** 6,
            dsc_inference,
            label="Ours(λ=0.01)",
            linestyle='-'
        )
        plt.title(f'EfficientNet_b0 batch_size:{batch_size}')
        plt.xlabel('Data Rate (MBps)')
        plt.ylabel('Inference Time (ms)')
        plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
        plt.grid()
        
        save_dir = f'./{INFERENCE_PLOT_DIR_MODIFIED}/{config["model_name"]}'
        os.makedirs(save_dir, exist_ok=True)
        save_path = os.path.join(save_dir, 'inference_time_%s.png' % (
                batch_size
            ))
        # check
        # print(np.round(dsc_inference,2))
        # print(np.round(whole_edge_inference,2))
        plt.savefig(save_path, bbox_inches='tight')
        plt.close()
        

In [None]:
print(VERSION)

0


In [None]:
config = {
    'processors': {
        'weak': 'cuda:1',
        'strong': 'cuda:0',
    },
    'model_name': 'quanteffnet_cfg_2468',
    'arch_path' : "/EdMIPS/arch_output/mixeffnet_b0_w2468a2468_100_csd0.01_modified/arch_model_best.pth.tar",
    'image_size': 224,
    'batch_sizes': list(range(1, 31)),
    'max_bandwidth': 128 * 10 ** 6,  # Bytes per second
    'min_bandwidth': 1 * 10 ** 6,  # Bytes per second
    'bandwidth_step': 1 * 10 ** 6,  # Bytes per second
    'graph_title': 'effcientnet_b0_w2468a2468_forlossynet ($\lambda = 0.01$)',
}


# config = {
#     'processors': {
#         'weak': 'cuda:1',
#         'strong': 'cuda:0',
#     },
#     'model_name': 'quanteffnet_cfg_2468_b3',
#     'arch_path' : "/EdMIPS/arch_output/mixeffnet_b3_w2468a2468_100_csd0.01_modified_v2/arch_model_best.pth.tar",
#     'image_size': 300,
#     'batch_sizes': list(range(1, 16)),
#     'max_bandwidth': 128 * 10 ** 6,  # Bytes per second
#     'min_bandwidth': 1 * 10 ** 6,  # Bytes per second
#     'bandwidth_step': 1 * 10 ** 6,  # Bytes per second
#     'graph_title': 'Ours ($\lambda = 0.01$)',
# }




In [36]:
# ここから下のコードは何をやってるの？
MODEL_NAME = config['model_name']  # ここでモデル名を取得
ARCH_NAME = config['graph_title']  # ここでモデル名を取得
INFERENCE_PLOT_DIR = os.path.join('inference_plot', MODEL_NAME, ARCH_NAME, str(VERSION))
if not os.path.exists(INFERENCE_PLOT_DIR):
    os.makedirs(INFERENCE_PLOT_DIR)
model, act_bits = get_model(config)
# print(model.features)
natural_bottlenecks = get_natural_bottlenecks(model, config["image_size"], act_bits=act_bits, compressive_only=True)

print([bottleneck['block_number'] for bottleneck in natural_bottlenecks])
# ここの+1なに？
block_numbers = [0] + [(bottleneck['block_number']+1) for bottleneck in natural_bottlenecks]
print(block_numbers)

colors_list = [
    'blue', 'green', 'red', 'teal', 'magenta',
    'yellow', 'black', 'orange', 'purple', 'brown',
    'pink', 'gray', 'olive', 'lime', 'indigo',
    'gold', 'darkblue', 'darkgreen', 'coral', 'skyblue',
    'lavender', 'beige', 'turquoise', 'plum', 'salmon','cyan'
]
PLOT_COLORS = [colors_list[i] for i in block_numbers]
# print(PLOT_COLORS)

archas: [6, 2, 4, 8, 6, 4, 6, 8, 6, 6, 4, 8, 4, 6, 2, 2, 4, 6, 8, 2, 4, 6, 4, 6, 4, 8, 6, 4, 6, 2, 2, 2, 2, 6, 2, 2, 6, 2, 6, 4, 4, 6, 4, 4, 2, 2, 2, 2, 6, 2, 2, 4, 2, 6, 2, 4, 4, 2, 6, 2, 4, 2, 2, 4, 4, 6, 6, 4, 6, 2, 4, 4, 2, 6, 2, 4, 4, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 6, 6, 4, 4, 4, 4, 4, 6, 6, 4, 2, 2, 6, 6, 4, 2, 4, 6, 6, 4, 2, 4, 6, 2, 2, 2, 2, 6, 2, 4, 4, 4, 4, 6, 2, 4, 4, 2, 8]
archws: [8, 8, 2, 8, 4, 8, 8, 4, 8, 8, 2, 2, 8, 2, 2, 2, 4, 2, 2, 2, 8, 8, 2, 8, 8, 8, 2, 8, 4, 2, 6, 2, 6, 4, 2, 8, 8, 2, 8, 8, 8, 2, 8, 2, 2, 6, 8, 2, 4, 2, 6, 8, 2, 2, 2, 6, 4, 2, 2, 2, 4, 8, 2, 8, 8, 8, 8, 8, 2, 2, 6, 6, 2, 2, 2, 8, 2, 2, 2, 2, 4, 2, 2, 2, 2, 6, 6, 2, 8, 2, 8, 8, 8, 8, 2, 4, 6, 8, 8, 2, 4, 2, 8, 8, 2, 4, 2, 8, 8, 2, 2, 2, 8, 8, 2, 2, 2, 8, 8, 4, 8, 4, 8, 2, 2, 4, 4, 2, 8]
Encountered BasicBlock at features.0
[3, 6, 9, 17, 19, 23]
[0, 4, 7, 10, 18, 20, 24]


In [37]:
for i, module in enumerate(model.features):
    print(i, module)

0 BasicCNNBlock(
  (conv): Conv2d(3, 38, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn): BatchNorm2d(38, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (silu): SiLU()
)
1 InvertedResidualBlock(
  (conv): Sequential(
    (0): CNNBlock(
      (cnn): QuantActivConv2d(
        (activ): HWGQ()
        (conv): QuantConv2d(38, 38, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=38, bias=False)
      )
      (bn): BatchNorm2d(38, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (silu): SiLU()
    )
    (1): SqueezeExcitation(
      (se): Sequential(
        (0): AdaptiveAvgPool2d(output_size=1)
        (1): QuantActivConv2d(
          (activ): HWGQ()
          (conv): QuantConv2d(38, 9, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (2): SiLU()
        (3): QuantActivConv2d(
          (activ): HWGQ()
          (conv): QuantConv2d(9, 38, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (4)

In [44]:
def load_batch_table1():
    batch_table_path = 'batch_table/efficient_baseline_v5.json'    # b0
    # batch_table_path = 'batch_table/efficientb3_baseline_v10.json'   #b3
    with open(batch_table_path, 'r') as batch_table_file:
        return json.loads(batch_table_file.read())

batch_table1 = load_batch_table1()
print(batch_table1)

layer_names = []
for i, natural_bottleneck in enumerate(natural_bottlenecks):
    # pretty_layer_name = '%s_%02d' % (
    # natural_bottleneck['layer_name'].split('_')[0],
    # int(natural_bottleneck['layer_name'].split('_')[-1])
    # )
    # print(pretty_layer_name)
    layer_names.append(natural_bottleneck['layer_name'])
print(layer_names)
for i in range(len(natural_bottlenecks)):
    natural_bottlenecks[i]["layer_name"] = layer_names[i]
    # natural_bottlenecks[i]["compression"] = i
    
print(natural_bottlenecks)


def get_items_for_values(dictionary, whole_device, whole_edge, target_key):
    key_len = len(dictionary)
    new_dict = {}
    for i in range(1, key_len+1):
        new_dict[str(i)] = {}
        for key, value in dictionary[str(i)].items():
            if key in "whole_device" and whole_device:
                new_dict[str(i)][key] = value
            if key in "whole_edge" and whole_edge:
                new_dict[str(i)][key] = value
            if key in target_key:
                new_dict[str(i)][key] = value
                # new_dict[str(i)][key]['compression'] = 0
                for natural in natural_bottlenecks:
                    if natural['layer_name'] == key:
                        new_dict[str(i)][key]['compression'] = natural['compression']
    return new_dict
# ベースラインのバッチテーブルからバッチテーブルのキーがwhole_deviceとwhole_edgeであるデータと、natural_bottlenekとなる
# ブロックのデータを抜き出し、natural_bottlenekとなるブロックのcompressionの値を get_natural_bottlenecks()で求めた別のモデルのcompressionの値に更新
print(get_items_for_values(batch_table1, True, True, layer_names))

{'1': {'whole_device': 115.05890115666641, 'whole_edge': 33.403415089999655, 'blocks_0': {'compression': 0.6666666666666666, 'head': 0.4038302233334434, 'tail': 34.097693313333366}, 'blocks_1': {'compression': 0.3333333333333333, 'head': 3.374997586666571, 'tail': 30.076414529999813}, 'blocks_2': {'compression': 0.125, 'head': 10.962852356666795, 'tail': 28.343934909999764}, 'blocks_3': {'compression': 0.125, 'head': 16.692602493333577, 'tail': 29.61196232666642}, 'blocks_4': {'compression': 0.052083333333333336, 'head': 21.212122013333403, 'tail': 25.124492406666832}, 'blocks_5': {'compression': 0.052083333333333336, 'head': 25.445428483333593, 'tail': 22.210260236666574}, 'blocks_6': {'compression': 0.026041666666666668, 'head': 29.815285576666685, 'tail': 20.545710316666828}, 'blocks_7': {'compression': 0.026041666666666668, 'head': 33.80711141666666, 'tail': 17.86688488999971}, 'blocks_8': {'compression': 0.026041666666666668, 'head': 38.88537258666664, 'tail': 15.531997696666622},

In [None]:
batch_table = get_items_for_values(batch_table1, True, False, layer_names)
# batch_table = get_items_for_values(batch_table1, True, True, layer_names)
INFERENCE_PLOT_DIR_MODIFIED = 'inference_plot_modified'
save_dir = f'./{INFERENCE_PLOT_DIR_MODIFIED}'
os.makedirs(save_dir, exist_ok=True)
create_inference_time_only(batch_table, config)

# Gainのグラフを作成するコード

In [12]:
# quanteffnet_w8a8_v8
existing = "int8_b3_v1.npy"
new = "quanteffnet_cfg_2468_b3_Ours ($\lambda = 0.025$).npy"

y = np.load(os.path.join('inference_time', existing), allow_pickle='TRUE')  #従来
z = np.load(os.path.join('inference_time', new), allow_pickle='TRUE')  #提案
# quanteffnet_cfg_2468_ours4_v9
absolute_diff = y - z
relative_diff = absolute_diff / y
diff_percent = relative_diff * 100
heatmap_data = np.clip(
    diff_percent,
    0,
    None
)
heatmap_data = diff_percent


fig, main_ax = plt.subplots()
fig.set_size_inches(FIGURE_SIZE[0], FIGURE_SIZE[1])
ax = sns.heatmap(np.array(heatmap_data), cbar_kws={'label': 'Gain %'}, ax=main_ax, linewidths=0.3, linecolor='w')
ax.set_xlabel('Data Rate (MBps)')
ax.set_ylabel('Batch Size')
ax.invert_yaxis()

# Adjust y-ticks
n = 2  # Display every 2nd tick
yticks = ax.yaxis.get_major_ticks()
for i, tick in enumerate(yticks):
    if i % n == 0:
        tick.label1.set_visible(True)
    else:
        tick.label1.set_visible(False)


GAIN_DIR = os.path.join('gain_graph')
if not os.path.exists(GAIN_DIR):
    os.makedirs(GAIN_DIR)
save_path = os.path.join(GAIN_DIR, '%s_gain_over_%s.svg' % (
existing,
new,))
ax.figure.savefig(save_path, bbox_inches='tight')
plt.close()


max_value = max([max(sublist) for sublist in heatmap_data])
print("max gain:",max_value)

sublist_sums = [sum(sublist)/len(sublist) for sublist in heatmap_data]
average = sum(sublist_sums) / len(heatmap_data)
print("average gain:", average)


max gain: 38.59692667482385
average gain: 29.38170668540817
