In [1]:
#套件
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
from torch.optim import Adam
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
import os
from torch.autograd import Variable
import time
from time import sleep
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.utils.prune as prune
from typing import Tuple, Dict
import torchvision.transforms as transforms
from torch import Tensor
from torchvision.datasets import CIFAR10


In [None]:
aaasda

In [None]:
#sub a and b  



In [3]:
#模型架構

class Net(nn.Module):

    def __init__(self) -> None:
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x: Tensor) -> Tensor:
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


DATA_ROOT = "./data"

def load_data() -> Tuple[torch.utils.data.DataLoader, torch.utils.data.DataLoader, Dict]:
    """Load CIFAR-10 (training and test set)."""
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )
    trainset = CIFAR10(DATA_ROOT, train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
    testset = CIFAR10(DATA_ROOT, train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)
    num_examples = {"trainset" : len(trainset), "testset" : len(testset)}
    return trainloader, testloader, num_examples

def train(
    net: Net,
    trainloader: torch.utils.data.DataLoader,
    epochs: int,
    device: torch.device,
) -> None:
    """Train the network."""
    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    print(f"Training {epochs} epoch(s) w/ {len(trainloader)} batches each")

    # Train the network
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            images, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 100 == 99:  # print every 100 mini-batches
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0


def test(
    net: Net,
    testloader: torch.utils.data.DataLoader,
    device: torch.device,
) -> Tuple[float, float]:
    """Validate the network on the entire test set."""
    criterion = nn.CrossEntropyLoss()
    correct = 0
    total = 0
    loss = 0.0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    return loss, accuracy


def main():
    DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Centralized PyTorch training")
    print("Load data")
    trainloader, testloader, _ = load_data()
    print("Start training")
    net=Net().to(DEVICE)
    # train(net=net, trainloader=trainloader, epochs=2, device=DEVICE)
    print("Evaluate model")
    loss, accuracy = test(net=net, testloader=testloader, device=DEVICE)
    print("Loss: ", loss)
    print("Accuracy: ", accuracy)


if __name__ == "__main__":
    main()

Centralized PyTorch training
Load data
Files already downloaded and verified
Files already downloaded and verified
Start training
Evaluate model
Loss:  721.453800201416
Accuracy:  0.1016


In [6]:
#efficient模型架構

def swish(x):
    return x * x.sigmoid()


def drop_connect(x, drop_ratio):
    keep_ratio = 1.0 - drop_ratio
    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
    mask.bernoulli_(keep_ratio)
    x.div_(keep_ratio)
    x.mul_(mask)
    return x


class SE(nn.Module):
    '''Squeeze-and-Excitation block with Swish.'''

    def __init__(self, in_channels, se_channels):
        super(SE, self).__init__()
        self.se1 = nn.Conv2d(in_channels, se_channels,
                             kernel_size=1, bias=True)
        self.se2 = nn.Conv2d(se_channels, in_channels,
                             kernel_size=1, bias=True)

    def forward(self, x):
        out = F.adaptive_avg_pool2d(x, (1, 1))
        out = swish(self.se1(out))
        out = self.se2(out).sigmoid()
        out = x * out
        return out


class Block(nn.Module):
    '''expansion + depthwise + pointwise + squeeze-excitation'''

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 expand_ratio=1,
                 se_ratio=0.,
                 drop_rate=0.):
        super(Block, self).__init__()
        self.stride = stride
        self.drop_rate = drop_rate
        self.expand_ratio = expand_ratio

        # Expansion
        channels = expand_ratio * in_channels
        self.conv1 = nn.Conv2d(in_channels,
                               channels,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(channels)

        # Depthwise conv
        self.conv2 = nn.Conv2d(channels,
                               channels,
                               kernel_size=kernel_size,
                               stride=stride,
                               padding=(1 if kernel_size == 3 else 2),
                               groups=channels,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(channels)

        # SE layers
        se_channels = int(in_channels * se_ratio)
        self.se = SE(channels, se_channels)

        # Output
        self.conv3 = nn.Conv2d(channels,
                               out_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)

        # Skip connection if in and out shapes are the same (MV-V2 style)
        self.has_skip = (stride == 1) and (in_channels == out_channels)

    def forward(self, x):
        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
        out = swish(self.bn2(self.conv2(out)))
        out = self.se(out)
        out = self.bn3(self.conv3(out))
        if self.has_skip:
            if self.training and self.drop_rate > 0:
                out = drop_connect(out, self.drop_rate)
            out = out + x
        return out


class EfficientNet(nn.Module):
    def __init__(self, cfg, num_classes=10):
        super(EfficientNet, self).__init__()
        self.cfg = cfg
        self.conv1 = nn.Conv2d(3,
                               32,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_channels=32)
        self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)

    def _make_layers(self, in_channels):
        layers = []
        cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
                                     'stride']]
        b = 0
        blocks = sum(self.cfg['num_blocks'])
        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
            strides = [stride] + [1] * (num_blocks - 1)
            for stride in strides:
                drop_rate = self.cfg['drop_connect_rate'] * b / blocks
                layers.append(
                    Block(in_channels,
                          out_channels,
                          kernel_size,
                          stride,
                          expansion,
                          se_ratio=0.25,
                          drop_rate=drop_rate))
                in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = swish(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.adaptive_avg_pool2d(out, 1)
        out = out.view(out.size(0), -1)
        dropout_rate = self.cfg['dropout_rate']
        if self.training and dropout_rate > 0:
            out = F.dropout(out, p=dropout_rate)
        out = self.linear(out)
        return out


def EfficientNetB0():
    cfg = {
        'num_blocks': [1, 2, 2, 3, 3, 4, 1],
        'expansion': [1, 6, 6, 6, 6, 6, 6],
        'out_channels': [16, 24, 40, 80, 112, 192, 320],
        'kernel_size': [3, 3, 5, 3, 5, 5, 3],
        'stride': [1, 2, 2, 2, 1, 2, 1],
        'dropout_rate': 0.2,
        'drop_connect_rate': 0.2,
    }
    return EfficientNet(cfg)


def test():
    net = EfficientNetB0()
    x = torch.randn(2, 3, 32, 32)
    y = net(x)
    print(y.shape)


if __name__ == '__main__':
    test()

torch.Size([2, 10])


In [67]:
#模型剪枝

# Let's load the model we just created and test the accuracy per label
model = EfficientNetB0()
state_dict = torch.load('/Users/taka/Desktop/code/FL-Pruning/FL3/efficientnet-b0.pth',map_location='cpu')
model.load_state_dict(state_dict)
layer_name = []


# for name, param in model.named_parameters():
name_list = [n for n,p in model.named_parameters()]
#print(name_list)

for name in name_list:

    # layer_name.append(name)
    # print(name)
    if 'weight' in name:
        m = getattr(model, name.split('.')[0])
        # print(m)

        try:
            prune.ln_structured(m,name="weight", amount=0.2,n=1,dim=0)
            a = m.weight.data
            b = m.weight_mask.data

            m.weight.data =m.weight.data.mul(m.weight_mask.data)       
            c = m.weight_mask.data

            for name, module in model.named_modules():
                if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
                    print(module)
                    prune.remove(module, 'weight')
                    # prune.remove(module, 'bias')
        except:
            try:
                
                prune.remove(module, 'weigh_mask')
                print('a')
            except:
                pass



#原始code---------------------------------


        # a = m.weight.data
    
        # b = m.weight_mask.data
        # print(a[0][2])
        # print(b[0][2])


        #sub weight to mask 
        
        # m.weight.data =m.weight.data.mul(m.weight_mask.data)       
        # c = m.weight_mask.data
        # print(c[0][2])


        # get the weight of each layer
    # if 'bias' in name:
    #     try:
    #         m = getattr(model, name.split('.')[0])
    #         prune.random_unstructured(m,name="bias", amount=0.03)

    #         m.bias.data =m.bias.data.mul(m.bias_mask.data)

    #         #sub weight and mask        
    #         weight = m.bias.data
    #         mask = m.bias_mask.data
    #     except:
    #         pass

#prune model mask   
# for name, module in model.named_modules():
#     if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
#         print(module)
#         prune.remove(module, 'weight')
#         # prune.remove(module, 'bias')




# # save model
torch.save(model.state_dict(), '/Users/taka/Desktop/code/FL-Pruning/FL3/efficientnet-b0_new.pth')

Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)


In [68]:
# vaildation the model
model = EfficientNetB0()
state_dict = torch.load('/Users/taka/Desktop/code/FL-Pruning/FL3/efficientnet-b0_new.pth',map_location='cpu')

#新--------------------------------
from collections import OrderedDict 

new_state_dict = OrderedDict()
for key,value in torch.load('/Users/taka/Desktop/code/FL-Pruning/FL3/efficientnet-b0_new.pth').items():
    if 'weight_orig' in key:
        title = key.split('.')
        name = title[0]+'.weight'
        new_state_dict[name] = value
        value1 = value
        print(key)
    elif 'weight_mask' in key:
        title = key.split('.')
        name = title[0]+'.weight'
        new_state_dict[name] = value1
        print(key)
    else:
        new_state_dict[key] = value

model.load_state_dict(new_state_dict)
#原始--------------------------------
#model.load_state_dict(state_dict)
#--------------------------------

model.eval()


transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )



# generate the test data
testset = CIFAR10(DATA_ROOT, train=False, download=True, transform=transform)
# generate the test data
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)


# test the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0], data[1]
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

linear.weight_orig
linear.weight_mask
Files already downloaded and verified
Accuracy of the network on the 10000 test images: 23 %


In [13]:
# vaildation the model
model = EfficientNetB0()
state_dict = torch.load('/Users/taka/Desktop/code/FL-Pruning/FL3/efficientnet-b0_74%_new.pth',map_location='cpu')

#新--------------------------------
from collections import OrderedDict 

new_state_dict = OrderedDict()
for key,value in torch.load('/Users/taka/Desktop/code/FL-Pruning/FL3/efficientnet-b0_74%_new\
    .pth',map_location='cpu').items():
    if 'weight_orig' in key:
        title = key.split('.')
        name = title[0]+'.weight'
        new_state_dict[name] = value
        value1 = value
        print(key)
    elif 'weight_mask' in key:
        title = key.split('.')
        name = title[0]+'.weight'
        new_state_dict[name] = value1
        print(key)
    else:
        new_state_dict[key] = value

model.load_state_dict(new_state_dict)

linear.weight_orig
linear.weight_mask


<All keys matched successfully>