# 模型安装

In [None]:
!pip install -U openmim
!mim install mmcv
!git clone https://github.com/open-mmlab/mmclassification.git
%cd mmclassification
!pip install -e . 

In [None]:
import copy
import os.path as osp
import numpy as np
import torch, torchvision
import mmcv
from mmcv import Config
from mmcls.datasets.builder import DATASETS
from mmcls.datasets import BaseDataset
from torchvision.datasets import ImageFolder
from mmcls.apis import train_model,inference_model,show_result_pyplot,set_random_seed
from mmcls.datasets import build_dataset,build_dataloader
from mmcls.models import build_classifier

# 模型训练

In [None]:
#数据路径
training_data_path='/kaggle/input/10-monkey-species/training/training/'
valid_data_path='/kaggle/input/10-monkey-species/validation/validation/'

In [None]:
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='RandomResizedCrop',
        size=224,
        backend='pillow',
        interpolation='bicubic'),
    dict(type='RandomFlip', flip_prob=0.5, direction='horizontal'),
    dict(
        type='RandAugment',
        policies=_base_.rand_increasing_policies,
        num_policies=2,
        total_level=10,
        magnitude_level=9,
        magnitude_std=0.5,
        hparams=dict(
            pad_val=[round(x) for x in img_norm_cfg['mean'][::-1]],
            interpolation='bicubic')),
    dict(type='RandomErasing',
        erase_prob=0.25,
        mode='rand',
        min_area_ratio=0.02,
        max_area_ratio=1 / 3,
        fill_color=img_norm_cfg['mean'][::-1],
        fill_std=img_norm_cfg['std'][::-1]),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='ImageToTensor', keys=['img']),
    dict(type='ToTensor', keys=['gt_label']),
    dict(type='Collect', keys=['img', 'gt_label'])
]

test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='Resize',
        size=(256, -1),
        backend='pillow',
        interpolation='bicubic'),
    dict(type='CenterCrop', crop_size=224),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='ImageToTensor', keys=['img']),
    dict(type='Collect', keys=['img'])
]

In [None]:
#读取所有的参数,参数文件就在当下文件夹中
cfg = Config.fromfile('./configs/resnext/resnext50_32x4d_b32x8_imagenet.py')
print(f'Config:\n{cfg.pretty_text}')  #输出所有的参数

In [None]:
cfg.model.head.num_classes = 10 

cfg.dataset_type = 'CustomDataset'
cfg.data.train.type = 'CustomDataset'
cfg.data.train.data_prefix = training_data_path
cfg.data.train.pipeline = train_pipeline

cfg.data.val.type ='CustomDataset'
cfg.data.val.data_prefix = valid_data_path
cfg.data.val.ann_file =None
cfg.data.val.pipeline[-1]=dict(type='Collect', keys=['img'])

cfg.data.test.type ='CustomDataset'
cfg.data.test.data_prefix = valid_data_path
cfg.data.test.ann_file =None
cfg.data.test.pipeline = test_pipeline
cfg.data.test.pipeline[-1]=dict(type='Collect', keys=['img','gt_label'])

# Modify the evaluation metric
cfg.evaluation['metric_options']={'topk': (1, )}

#cfg.load_from = 'checkpoints/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth'   #是否是预训练模型
cfg.init_cfg = dict(type='TruncNormal', layer='Linear', mean=0.2)

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer =dict(type='AdamW',lr=0.004,weight_decay=0.05,)

#学习率调整  参数一定要传对
#cfg.lr_config = dict(policy='step', step=[30, 60, 90])
#cfg.lr_config = dict(policy='exp',gamma=0.1)
#cfg.lr_config = dict(policy='fixed')
#cfg.lr_config = dict(policy='poly',power=1.25, min_lr=0.001)
cfg.lr_config=dict(policy='CosineAnnealing',min_lr=0.0001)
#cfg.lr_config = dict(policy='CosineRestart',periods=[1,3])
 
cfg.lr_config.warmup='linear'
cfg.lr_config.warmup_iters=20
cfg.lr_config.warmup_ratio=0.001

cfg.runner = dict(type='EpochBasedRunner', max_epochs=100) #训练迭代次数

cfg.log_config.interval = 10


cfg.evaluation.interval = 12
cfg.checkpoint_config.interval = 12
cfg.work_dir = './tutorial_exps'

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)#设备指定
cfg.device='cuda'
#val是runer中的val_step，执行和train_step一样，计算损失的，并不是训练集评估acc
cfg.workflow= [('train', 2),('val',1)]
#print(f'Config:\n{cfg.pretty_text}')  #输出所有的参数



In [None]:

#构造数据集，它的长度len(data_loaders) == len(workflow)
datasets = [build_dataset(cfg.data.train),build_dataset(cfg.data.test)]
#workflow 中的评估数据集是计算loss的，所以必须在pipeline中给定label
#datasets = [build_dataset(cfg.data.train),build_dataset(cfg.data.val)]
# Build the detector
model = build_classifier(cfg.model) #建立模型

# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES#将类名加入到模型中

# Create work_dir
#如果要在val数据上测试，则要validata=True，而不是设置work_flow
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))#创建保存文件

In [None]:
train_model(model, 
            datasets, 
            cfg, 
            distributed=False,
            validate=True) #validate=True 是在val数据集上做评估的，pipeline中不能有label

# FP16训练

In [None]:
cfg.fp16 = dict(loss_scale='dynamic')
#执行训练过程
cfg.optimizer_config = dict(grad_clip=None)
#必须给一个meta{}
train_model(model, 
            datasets, 
            cfg, 
            distributed=False,
            validate=False,
            meta ={}) #训练validate=True是用来做eval的钩子的


# 使用模型

In [None]:
from mmcls.apis import inference_model
model.eval()
model.cfg=cfg
#model.cfg.data.test.pipeline[-1]={'type': 'Collect', 'keys': ['img']}
result = inference_model(model, '/kaggle/input/10-monkey-species/training/training/n0/n0018.jpg')

# 单机多卡命令行实现训练


In [None]:
#只能用 tools/train.py进行分布式训练，缺少init_函数
                         #1 config                             #GPU 
!bash dist_train.sh ../configs/resnet/resnet18_8xb16_cifar10.py 2 