# テーマD：量子化-多層分解同時実行時における深さとビット幅の最適比
[Open In Colab](https://colab.research.google.com/github/ArtIC-TITECH/b3-proj-2025/blob/main/theme_D/theme_D.ipynb)

## モジュールの読み込み

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.autograd import Function
import math


## MNISTのデータセット/精度評価関数の作成

In [2]:
# 実行デバイスの設定
device = 'cuda:2'

# 普通のtransform
transform_normal = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# テストデータには普通のtransformを使ってください
transform_for_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_normal) # モデルの学習に使うデータセット
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform_for_test) # モデルの評価に使うデータセット
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

def compute_accuracy(model, test_loader, device='cuda:0'):
    model.eval()  # 評価モード
    model.to(device)
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images.to(device))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.to(device)).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy

def train(model, lr=0.05, epochs=5, device='cuda:0'):
    # 損失関数と最適化手法の定義
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    model.to(device)
    for epoch in range(epochs):
        loss_sum = 0
        for images, labels in train_loader:
            # モデルの予測
            outputs = model(images.to(device))

            # 損失の計算
            loss = criterion(outputs, labels.to(device))
            loss_sum += loss.item()

            # 勾配の初期化
            optimizer.zero_grad()

            # バックプロパゲーション
            loss.backward()

            # オプティマイザの更新
            optimizer.step()

        # 損失を表示
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss_sum/len(train_loader):.4f}')
    return model



100%|██████████| 9.91M/9.91M [00:01<00:00, 5.38MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 176kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 1.65MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 3.04MB/s]



## 通常モデルの学習

In [None]:
class SimpleModel(nn.Module):
    def __init__(self): # モデルのセットアップ
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 24)
        self.fc2 = nn.Linear(24, 10)

    def forward(self, x): # モデルが行う処理
        x = x.view(-1, 28 * 28)  # 28x28の画像を１次元に変換
        x = self.fc1(x) 
        x = nn.ReLU()(x) # 活性化関数
        x = self.fc2(x) 
        return x


精度の確認

In [None]:

# モデルのインスタンスを作成
model = SimpleModel().to(device)
model = train(model, lr=0.1, epochs=10, device=device)
accuracy = compute_accuracy(model, test_loader, device=device)

Accuracy: 91.56%


## スカラー量子化（一様対称量子化）の実行

###  プロセス：量子化層に変換-->量子化認識学習

ここでは簡便に量子化パラメータをmin-maxスケーリングで決定する
対称量子化なので、行列Xの最大値と最小値の差の２分の1を$p$-bitの数値範囲の最大値$q_{max}$でわる

$q_{max} = 2^{(p-1)} - 1$

$s = \frac{max(X) - min(X)}{2q_{max}}$

$X_{q} = s * \text{clip}(\text{round}(\frac{X}{s}), -q_{max}, q_{max})$

In [4]:

class SymQuantSTE(Function):
    @staticmethod
    def forward(ctx, input: torch.Tensor, scale: torch.Tensor, num_bits: int):
        if num_bits == 1:
            s = scale.abs()
            output = s * torch.sgn(input)
        else:
            s = scale.abs().clamp_min(1e-8)
            qmax = 2 ** (num_bits - 1) - 1
            q = torch.clamp(torch.round(input / s), -qmax, qmax)
            output = q * s

        # backward用に保存
        ctx.save_for_backward(input, s)
        ctx.num_bits = num_bits
        return output

    @staticmethod
    def backward(ctx, grad_output):
        input, s = ctx.saved_tensors   # forwardでsaveしたものを正しく取り出す
        num_bits = ctx.num_bits
        if num_bits == 1:
            mask = (input.abs() <= s).to(grad_output.dtype)
            grad_input = grad_output * mask
        else:
            qmax = 2 ** (num_bits - 1) - 1
            mask = (input.abs() <= qmax * s).to(grad_output.dtype)
            grad_input = grad_output * mask

        return grad_input, None, None




class SymQuantLinear(nn.Linear):
    def __init__(self, in_features, out_features, bias=True, weight_bits=8, act_bits=None):
        super().__init__(in_features, out_features, bias)
        self.weight_bits = weight_bits
        self.act_bits = act_bits

    def forward(self, input):
        # weight のスケール
        if self.weight_bits == 1:
            weight_scale = self.weight.abs().sum() / self.weight.numel()
        else:
            qmax_w = 2 ** (self.weight_bits - 1) - 1
            weight_scale = (self.weight.max() - self.weight.min()) / (2 * qmax_w)

        # activation のスケール
        if self.act_bits is not None:
            if self.act_bits == 1:
                act_scale = input.abs().sum() / input.numel()
            else:
                qmax_a = 2 ** (self.act_bits - 1) - 1
                act_scale = (input.max() - input.min()) / (2 * qmax_a)
            input = SymQuantSTE.apply(input, act_scale, self.act_bits)

        # quantized weight
        w_q = SymQuantSTE.apply(self.weight, weight_scale, self.weight_bits)

        return F.linear(input, w_q, self.bias)



def replace_linear_with_quantizedlinear(module, weight_bits=8, act_bits=None):
    for name, child in module.named_children():
        # すでに QuantizedLinear ならスキップ
        if isinstance(child, SymQuantLinear):
            qlinear = SymQuantLinear(
                child.in_features,
                child.out_features,
                bias=(child.bias is not None),
                weight_bits=weight_bits,
                act_bits=act_bits
            )
            # 重みとバイアスをコピー
            qlinear.weight.data.copy_(child.weight.data)
            if child.bias is not None:
                qlinear.bias.data.copy_(child.bias.data)
            setattr(module, name, qlinear)
        if isinstance(child, nn.Linear):
            qlinear = SymQuantLinear(
                child.in_features,
                child.out_features,
                bias=(child.bias is not None),
                weight_bits=weight_bits,
                act_bits=act_bits
            )
            # 重みとバイアスをコピー
            qlinear.weight.data.copy_(child.weight.data)
            if child.bias is not None:
                qlinear.bias.data.copy_(child.bias.data)
            setattr(module, name, qlinear)
        else:
            replace_linear_with_quantizedlinear(child, weight_bits, act_bits)
    return module



In [None]:
# モデルのインスタンスを作成
model = SimpleModel().to(device)
# 通常学習
print('warming up by no-quantized training...')
model = train(model, lr=0.1, epochs=5, device=device)
# Linear層をQuantizedLinearに置換
model_q = replace_linear_with_quantizedlinear(model, weight_bits=1, act_bits=None)
print('quantization aware training...')
model_q = train(model_q, lr=1e-2, epochs=10, device=device)
accuracy = compute_accuracy(model_q, test_loader)

In [5]:
class DecomposedLinear(nn.Module):
    def __init__(self, in_dim, out_dim, depth=3, bias=True):
        super().__init__()
        assert depth >= 1, "depth must be >= 2"

        # 中間次元 l を計算
        if depth == 2:
            l = int(round(in_dim * out_dim / (in_dim + out_dim)))
            dims = [in_dim, l, out_dim]
        elif depth == 1:
            l = min(in_dim, out_dim)
            dims = [in_dim, out_dim]
        else:
            a = depth - 2
            b = in_dim + out_dim
            c = - in_dim * out_dim
            l = int(round((-b + math.sqrt(b*b - 4*a*c)) / (2*a)))
            dims = [in_dim] + [l]*(depth-1) + [out_dim]

        self.l = l
        self.depth = depth

        # nn.Linear を順につなげる
        self.layers = nn.ModuleList([
            nn.Linear(dims[i], dims[i+1], bias=(i==depth-1 and bias))
            for i in range(depth)
        ])

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    

# --- nn.Linear をすべて DecomposedLinear に置換 ---
def replace_linear_with_decomposedlinear(module, depth=3):
    for name, child in module.named_children():
        if isinstance(child, DecomposedLinear):
            continue
        if isinstance(child, nn.Linear):
            m, n = child.in_features, child.out_features
            dlinear = DecomposedLinear(m, n, depth=depth)
            # バイアスは最後の層にコピー
            if child.bias is not None:
                dlinear.layers[-1].bias.data.copy_(child.bias.data)
            setattr(module, name, dlinear)
        else:
            replace_linear_with_decomposedlinear(child, depth=depth)
    return module



In [6]:
# 行列分解モデルのインスタンスを作成
model = SimpleModel().to(device)
model = replace_linear_with_decomposedlinear(model, depth=2)

# 通常学習
print('warming up by no-quantized training...')
model = train(model, lr=0.1, epochs=5, device=device)
# Linear層をDecomposedQuantizedLinearに置換
model_q = replace_linear_with_quantizedlinear(model, weight_bits=1, act_bits=None)

print('quantization aware training...')
model_q = train(model_q, lr=1e-2, epochs=10, device=device)
accuracy = compute_accuracy(model_q, test_loader)

warming up by no-quantized training...
Epoch [1/5], Loss: 0.5831
Epoch [2/5], Loss: 0.2633
Epoch [3/5], Loss: 0.2200
Epoch [4/5], Loss: 0.1907
Epoch [5/5], Loss: 0.1762
quantization aware training...
Epoch [1/10], Loss: 0.8134
Epoch [2/10], Loss: 0.5061
Epoch [3/10], Loss: 0.4639
Epoch [4/10], Loss: 0.4582
Epoch [5/10], Loss: 0.4344
Epoch [6/10], Loss: 0.4179
Epoch [7/10], Loss: 0.4139
Epoch [8/10], Loss: 0.4184
Epoch [9/10], Loss: 0.4200
Epoch [10/10], Loss: 0.4149
Accuracy: 86.96%


## モデルサイズの確認

In [8]:
def compute_model_size(model: nn.Module):
    total_size = 0
    for module in model.modules():
        if isinstance(module, nn.Linear):
            # 通常の Linear 層のサイズ (float32)
            total_size += module.weight.numel() * 4  # float32 として計算
            if module.bias is not None:
                total_size += module.bias.numel() * 4  # float32 として計算
        elif isinstance(module, SymQuantLinear):
            if module.weight_bits is None:
                module.weight_bits = 32  # float32として計算
            # weight のサイズ
            total_size += module.weight.numel() * module.weight_bits/8  
            # bias のサイズ
            if module.bias is not None:
                total_size += module.bias.numel() * 4  # float32 として計算
    return total_size / (1024 * 1024)  # MB単位で返す


model_size = compute_model_size(model_q)
print(f'Model size: {model_size:.2f} MB')

Model size: 0.07 MB


## 課題
### ・重みをパラメータ数を変えずに行列分解して多層化したものに対し量子化をした場合、低ビット幅の状況下において深さと精度はどのような関係になるか評価する
### ・行列分解に加えて、層間に活性化関数を挟みこむと、低ビット時の精度はどうなるかを評価する