In [1]:
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.optim as optim
from tqdm import tqdm
import os
print(torch.__version__)

1.10.2


# 模型结构图

t 为扩张稀疏比例，c 为输出通道数，n 为该层重复的次数，s为步长,输入图片大小224x224x3

![pic](mobilenetv2_stru.png)

In [2]:
class ConvBNReLu(nn.Sequential):
    def __init__(self,in_channel,out_channel,kernel_size=3,strides=1,groups=1):
        padding=(kernel_size-1)//2
        super().__init__(
            nn.Conv2d(in_channel,out_channel,kernel_size,strides,padding,groups=groups,bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True),
        )
        self.out_channel=out_channel

class InvertedResidual(nn.Module):
    def __init__(self,in_channel,out_channel,strides,pw_ratio=6):
        '''
        pw_ratio: 第一个pw的1x1卷积channel数拓展比例
        '''
        super().__init__()
        # strides只能是1 或 2
        assert strides in [1, 2]
        self.use_shortcut= strides==2 and in_channel==out_channel
        hidden_channel=int(round(in_channel*pw_ratio))
        layer=[]
        if pw_ratio!=1:
            # add 1x1 升维
            layer.append(ConvBNReLu(in_channel,hidden_channel,kernel_size=1))
        layer.extend([
            ConvBNReLu(hidden_channel,hidden_channel,strides=strides,groups=hidden_channel),
            nn.Conv2d(hidden_channel,out_channel,kernel_size=1,stride=1,padding=0,bias=False),
            nn.BatchNorm2d(out_channel),
        ])
        self.InvertedResidual=nn.Sequential(*layer)
        self.out_channel=out_channel
    def forward(self,x):
        return x+self.InvertedResidual(x) if self.use_shortcut else self.InvertedResidual(x) 
        

        

In [3]:
class MobileNetV2(nn.Module):
    # 网络默认结构
    # 对应参数为 t,c,n,s
    net_stru=(
    (1, 16, 1, 1),
    (6, 24, 2, 2),
    (6, 32, 3, 2),
    (6, 64, 4, 2),
    (6, 96, 3, 1),
    (6, 160, 3, 2),
    (6, 320, 1, 1),
    )
    _defaults={
        "first_channel":32,
        "last_channel":1280,
        "round_nearest":8,
        "width_mult":1.0
    }
    def __init__(self,num_class=10,net_stru=None,full_conn=True):
        '''
        width_mult:输入输出的channel扩增比例
        num_class: 分类数目
        round_nearest:要求每个输入输出都要被round_nearest整除
        net_stru: 网络结构的tuple
        full_conn: 是否使用最后的全连接层分类层
        '''
        super().__init__()
        self.__dict__.update(self._defaults)
        # 一些可调参数
        if(net_stru):
            # 如果外部传入了网络结构,用外部的
            self.net_stru=net_stru
        if(len(self.net_stru)==0 or len(self.net_stru[0])!=4):
            raise ValueError("input net struct error %s",self.net_stru)
        self.use_classfy=full_conn
        self.num_class=num_class
        
        # 确保每个输入和输出的通道数都为8的倍数
        self.first_channel=MobileNetV2._make_divisible(self.first_channel,self.round_nearest)
        # 也就是说输出至少是1280,可以被调大
        self.last_channel=MobileNetV2._make_divisible(self.last_channel*max(self.width_mult,1.0),self.round_nearest)
        features=[ConvBNReLu(3,self.first_channel,strides=2)]
        # 记录第一个输入通道数
        in_channel=self.first_channel
        for t,c,n,s in self.net_stru:
            out_channel=MobileNetV2._make_divisible(c*self.width_mult,self.round_nearest)
            for cnt in range(n):
                # 几个连续的倒残差块,仅第一个卷积的跨距大小会有不同
                if cnt==0:
                    strides=s
                else:
                    strides=1
                features.append(InvertedResidual(in_channel,out_channel,strides,pw_ratio=t))
                in_channel=out_channel
        # 加上最后的转变成conv2d 1x1的卷积
        features.append(ConvBNReLu(in_channel,self.last_channel,kernel_size=1))
        self.bone=nn.Sequential(*features)
        
        if(self.use_classfy):
            self.classify=nn.Sequential(
                nn.Dropout(0.2),
                nn.Linear(self.last_channel,self.num_class),
            )
        self.weight_init()
        
    def weight_init(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # 一种正态分布
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)
                
    def forward(self,x):
        out=self.bone(x)
        if(self.use_classfy):
            # 对2,3两个维度求平均,也就是把batchx1280x7x7的向量
            # 求解为batchx1280的维度
            out=out.mean([2,3])
            out=self.classify(out)
        return out
        
        
    @staticmethod
    def _make_divisible(v, divisor, min_value=None):
        '''
        全是整数运算,保证返回的new_v可以被divisor整除
        :param v:输入channel数量,例如32
        :param divisor:被整除的数字,一般为8,硬件要求对齐
        :param min_value:
        :return:
        '''
        if min_value is None:
            min_value = divisor
        # int(v + divisor / 2) // divisor * divisor 这句是求v被8整除的floor
        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v  

        

In [4]:
device='cuda' if torch.cuda.is_available() else "cpu"
net=MobileNetV2()
net.to(device)
rand_data=torch.randn(4,3,224,224).to(device)
out=net(rand_data)
out.shape

torch.Size([4, 10])

In [5]:
# 注意torch里是宽高顺序,和PIL不同
img_size=(224,224)
# mean和std均值需要按照数据集来修改
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
batch=64
lr=0.01

In [6]:
#prepare dataset and preprocessing
transform_train = transforms.Compose([
    transforms.Resize(img_size),
    transforms.RandomCrop(img_size, padding=4),
    transforms.RandomHorizontalFlip(),
    # 至少要加上下面这句ToTensor
    transforms.ToTensor(),
    # ciaf10固有均值标准差
    transforms.Normalize(mean,std)
])

transform_val = transforms.Compose([
    transforms.Resize(img_size),
    # 至少要加上下面这句ToTensor
    transforms.ToTensor(),
    transforms.Normalize(mean,std)
])


In [7]:
# minist数据集
# 正常来说,train_set这个类是需要自己定义的,但是在官方数据集中已经给定义好了
data_dir="D:/data/image/"
if not os.path.exists(data_dir):
    # 尝试mac的文件夹
    data_dir="~/data"
    if not os.path.exists(data_dir):
            raise FileExistsError("data source not exist!")
train_set=datasets.ImageFolder(root=data_dir+'animals10/train',
                        transform=transform_train)

val_set=datasets.ImageFolder(root=data_dir+'animals10/val',
                        transform=transform_val)
train_set_len=len(train_set)
val_set_len=len(val_set)
print('train data',train_set_len)
print('val data',val_set_len)
print('train label',train_set.class_to_idx)
print('val label',val_set.class_to_idx)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch,
                                         shuffle=True, num_workers=6)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch,
                                         shuffle=False, num_workers=6)


train data 23556
val data 2623
train label {'cane': 0, 'cavallo': 1, 'elefante': 2, 'farfalla': 3, 'gallina': 4, 'gatto': 5, 'mucca': 6, 'pecora': 7, 'ragno': 8, 'scoiattolo': 9}
val label {'cane': 0, 'cavallo': 1, 'elefante': 2, 'farfalla': 3, 'gallina': 4, 'gatto': 5, 'mucca': 6, 'pecora': 7, 'ragno': 8, 'scoiattolo': 9}


In [8]:
class CalcLoss(nn.Module):
    def __init__(self,num_classes=10):
        super().__init__()
        self.criterion=nn.CrossEntropyLoss()
    def forward(self,y_true,y_pred):
        return self.criterion(y_true,y_pred)

In [9]:
class TrainModel(object):
    _defaults={
        "eopch":2,
    }
    def __init__(self,net,loss,train_dataloder,optimizer,**kwargs):
        '''
        还支持传入字典参数
        '''
        self.__dict__.update(self._defaults)
        self.net=net
        self.loss=loss
        self.dataloder=train_dataloder
        self.optimizer=optimizer
        for name, value in kwargs.items():
            setattr(self, name, value)
        self.device='cuda' if torch.cuda.is_available() else "cpu"
    def __call__(self,train_total_len,batch_size):
        self.net.train()
        # 需要注意这里写的是train_set的长度,如果写错成train_loader,返回的是数据集一共有多少个batch
        with tqdm(total=train_total_len,desc=f'Train:') as pbar:
            for idx,data in enumerate(self.dataloder):
                data,label=data
                data,label=data.to(self.device),label.to(self.device)
                self.optimizer.zero_grad()
                # forward
                outputs=self.net(data)
                loss=self.loss(outputs,label)
                loss.backward()
                self.optimizer.step()
                # 更新进度条
                pbar.update(batch_size)

In [10]:
class TestModel(object):
    _defaults={
    "eopch":1,
    }
    def __init__(self,net,loss,val_dataloder,**kwargs):
        '''
        还支持传入字典参数
        '''
        self.__dict__.update(self._defaults)
        self.net=net
        self.loss=loss
        self.dataloder=val_dataloder
        for name, value in kwargs.items():
            setattr(self, name, value)
        self.device='cuda' if torch.cuda.is_available() else "cpu"
    def __call__(self,total_num,batch_size):
        # eval 模式下,dropout失效,bn层参数采用之前训练的,不更新
        self.net.eval()
        val_loss=0
        correct=0
        with tqdm(total=total_num,desc=f'Validation:') as pbar:
            # 不计算损失,这样速度更快
            with torch.no_grad():
                for idx,data in enumerate(self.dataloder):
                    x,y=data
                    x,y=x.to(self.device),y.to(self.device)
                    y_pre=self.net(x)
                    val_loss+=self.loss(y_pre,y).item()
                    # max 第一个返回的是元素值,第二个为索引值
                    # 求第一个维度的max,因此结果返回的是batch维度的max
                    # 返回是一个第一个元素为值,第二个元素为idx的tuple
                    pred=torch.max(y_pre,dim=1)[1]

                    # pred维度为batch,每个元素为索引
                    correct+=pred.eq(y).sum().item()
                    # 更新进度条
                    pbar.update(batch_size)
            # 格式化打印直接有% 带f%这种{:.2f%}是错的格式,format这种有点坑
            print("test loss {},accuracy {:.2%}".format(val_loss,correct/total_num))
        # 返回损失和准确率
        return (val_loss,correct/total_num)

In [11]:
optimizer=optim.SGD(net.parameters(),lr=lr,momentum=0.9)
loss=CalcLoss().to(device)
train=TrainModel(net,loss,train_loader,optimizer)
test=TestModel(net,loss,val_loader)

In [None]:
total_epoch=10
for i in range(total_epoch):
    train(train_set_len,batch)
    test(val_set_len,batch)

Train:: 23616it [01:15, 311.42it/s]                                                                                                  
Validation:: 2624it [00:10, 260.48it/s]                                                                                              


test loss 73.228730738163,accuracy 40.26%


Train:: 23616it [01:10, 335.69it/s]                                                                                                  
Validation:: 2624it [00:10, 261.02it/s]                                                                                              


test loss 58.42719841003418,accuracy 51.16%


Train:: 23616it [01:10, 335.93it/s]                                                                                                  
Validation:: 2624it [00:10, 255.17it/s]                                                                                              


test loss 49.77328488230705,accuracy 58.71%


Train:: 23616it [01:10, 334.95it/s]                                                                                                  
Validation:: 2624it [00:10, 259.54it/s]                                                                                              


test loss 48.3777861893177,accuracy 59.28%


Train:: 23616it [01:10, 336.04it/s]                                                                                                  
Validation:: 2624it [00:10, 258.34it/s]                                                                                              


test loss 53.80790504813194,accuracy 57.11%


Train:: 23616it [01:10, 335.85it/s]                                                                                                  
Validation:: 2624it [00:10, 258.00it/s]                                                                                              


test loss 41.11389338970184,accuracy 66.30%


Train:: 23616it [01:10, 335.81it/s]                                                                                                  
Validation:: 2624it [00:10, 260.52it/s]                                                                                              


test loss 39.15046951174736,accuracy 67.90%


Train:: 23616it [01:10, 334.60it/s]                                                                                                  
Validation:: 2624it [00:10, 258.11it/s]                                                                                              


test loss 39.20372584462166,accuracy 68.66%


Train:: 23616it [01:10, 335.11it/s]                                                                                                  
Validation:: 2624it [00:10, 257.66it/s]                                                                                              


test loss 34.93609642982483,accuracy 72.40%


Train::  41%|█████████████████████████████████▊                                                | 9728/23556 [00:32<00:37, 369.78it/s]

In [None]:
torch.save(net.state_dict(),"mobilenetV2_loss100_accuracy78.pth")