In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from torchvision import datasets
from PIL import Image
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torch.autograd import Variable
from scipy.special import binom
import math
import os
import importlib
import time
import jpeg4py as jpeg

# Load Data

## Load Labels

In [65]:
data = pd.read_csv(r"C:\Users\Tang\Desktop\5329\multi-label-classification-competition-2024\COMP5329S1A2Dataset\train.csv", usecols = [0, 1])
train_idx, test_idx = train_test_split(data.index, test_size=0.2, random_state=540446740)
train = data.loc[train_idx, :].reset_index()
test = data.loc[test_idx, :].reset_index()

In [66]:
train_label = np.zeros((train.shape[0], 19))
for i in range(len(train['Labels'])):
  cur_label = [int(i) for i in train['Labels'][i].split(' ')]
  for j in cur_label:
    train_label[i, j-1] = 1

test_label = np.zeros((test.shape[0], 19))
for i in range(len(test['Labels'])):
  cur_label = [int(i) for i in test['Labels'][i].split(' ')]
  for j in cur_label:
    test_label[i, j-1] = 1

In [67]:
np.sum(test_label, axis = 0)

array([4548.,  222.,  843.,  244.,  253.,  293.,  236.,  436.,  215.,
        284.,  137.,    0.,  110.,   45.,  409.,  221.,  283.,  308.,
        209.])

## Download image

In [20]:
#Load img
!pip install kaggle
!kaggle datasets list

401 - Unauthorized - Unauthenticated


In [21]:
!cd /content/drive/MyDrive
!pwd

系统找不到指定的路径。
'pwd' 不是内部或外部命令，也不是可运行的程序
或批处理文件。


In [7]:
!mkdir -p kaggle

In [8]:
!pip install -q kaggle #如果有bug，把这里改成!pip install --upgrade --force-reinstall --no-deps kaggle==1.5.12
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

ERROR: Invalid requirement: '#如果有bug，把这里改成!pip'
命令语法不正确。
'cp' 不是内部或外部命令，也不是可运行的程序
或批处理文件。
'chmod' 不是内部或外部命令，也不是可运行的程序
或批处理文件。


In [9]:
!kaggle datasets list

401 - Unauthorized - Unauthenticated


## Load image

In [6]:
def Myloader(path):
  return Image.open(path).convert('RGB')

In [7]:
path = r"C:\Users\Tang\Desktop\5329\multi-label-classification-competition-2024\COMP5329S1A2Dataset\data"

In [8]:
class MyDataset(Dataset):
    def __init__(self, data, transform, loader):
        self.data = data
        self.transform = transform
        self.loader = loader
    def __getitem__(self, item):
        img, label = self.data[item]
        img = self.loader(img)
        img = self.transform(img)
        return (img, label)

    def __len__(self):
        return len(self.data)

    def load_sample(self, index):
        # 手动加载指定索引处的数据
        img, label = self.data[index]
        img = self.loader(img)
        img = self.transform(img)
        return img, label

transform = transforms.Compose([
    transforms.Resize((255, 255)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))  # 归一化
])


In [9]:
train_imgs = [path + "\\{}.jpg".format(i) for i in train_idx]
train_data = []
for i in range(len(train_imgs)):
  train_data.append([train_imgs[i], train_label[i, ]])
train_input = MyDataset(train_data, transform=transform, loader=Myloader)

In [10]:
# 创建数据加载器
batch_size = 32
train_loader = DataLoader(train_input, batch_size=batch_size, shuffle=True, num_workers = 0, pin_memory = False, drop_last = True)

In [11]:
test_imgs = [path + "\\{}.jpg".format(i) for i in test_idx]
test_data = []
for i in range(len(test_imgs)):
  test_data.append([test_imgs[i], test_label[i, ]])
test_input = MyDataset(test_data, transform=transform, loader=Myloader)

In [12]:
# 创建数据加载器
batch_size = 32
test_loader = DataLoader(test_input, batch_size=batch_size, shuffle=True, num_workers = 0, pin_memory = False, drop_last = True)

# Model

In [13]:
model_urls = {
    "resnet50" : "https://download.pytorch.org/models/resnet50-19c8e357.pth"
}

In [14]:
class FixedBatchNorm(nn.BatchNorm2d):
  def forward(self, input):
    return F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias,
                        training = False, eps = self.eps)

In [15]:
class Bottleneck(nn.Module):

  #Expansion param --> modify planes
  expansion = 4

  #definiation of params
  def __init__(self, inplanes, planes, stride = 1, downsample = None, dilation = 1):
    super(Bottleneck, self).__init__()

    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size = 1, bias = False)
    self.bn1 = FixedBatchNorm(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size = 3, stride = stride,
                           padding = dilation, bias = False,
                           dilation = dilation)
    self.bn2 = FixedBatchNorm(planes)
    self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size = 1, bias = False)
    self.bn3 = FixedBatchNorm(planes * 4)
    self.relu = nn.ReLU(inplace = True)
    self.downsample = downsample
    self.stride = stride
    self.dilation = dilation

  #forward propagation
  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    if self.downsample is not None:
      residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out

In [16]:
#标准ResNet模型
class ResNet(nn.Module):

  def __init__(self, block, layers, strides = (2, 2, 2, 2), dilations = (1, 1, 1, 1)):
    self.inplanes = 64
    super(ResNet, self).__init__()

    # resnet里的第一个部分
    self.conv1 = nn.Conv2d(3, 64, kernel_size = 7, stride = strides[0], padding = 3,
                           bias = False)
    self.bn1 = FixedBatchNorm(64)
    self.relu = nn.ReLU(inplace = True)
    self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)

    # 四个bottleneck
    self.layer1 = self._make_layer(block, 64, layers[0], stride = strides[0], dilation = dilations[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride = strides[1], dilation = dilations[1])
    self.layer3 = self._make_layer(block, 256, layers[2], stride = strides[2], dilation = dilations[2])
    self.layer4 = self._make_layer(block, 512, layers[2], stride = strides[3], dilation = dilations[3])

    self.inplanes = 1024

  def _make_layer(self, block, planes, blocks, stride = 1, dilation = 1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
          nn.Conv2d(self.inplanes, planes * block.expansion,
                    kernel_size = 1, stride = stride, bias = False),
          FixedBatchNorm(planes * block.expansion),
      )

    layers = [block(self.inplanes, planes, stride, downsample, dilation = 1)]
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes, dilation = dilation))

    return nn.Sequential(*layers)

  #forward propagation
  def forward(self, x):
    #1
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)
    #2-5
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    #fully connected
    x = self.avgpool(x)
    x = x.view(x.size(0), -1)
    x = self.fc(x)

    return x


In [17]:
#function to generate model
def resnet50(pretrained = True, **kwargs):

  model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) #若使用其他resnet系列，只要修改中括号内的参数
  if pretrained:
    state_dict = model_zoo.load_url(model_urls['resnet50']) #load pretrained model from pytorch
    model.load_state_dict(state_dict, strict = False)
    print("model pretrained initialized")

  return model

In [18]:
#工具类
class tools:

  @staticmethod
  def gap2d(x, keepdims = False):
    out = torch.mean(x.view(x.size(0), x.size(1), -1), -1)
    if keepdims:
      out = out.view(out.size(0), out.size(1), 1, 1)
    return out

In [19]:
#优化器
class PolyOptimizer(torch.optim.SGD):

  def __init__(self, params, lr, weight_decay, max_step, momentum = 0.9):
    super().__init__(params, lr, weight_decay)

    self.global_step = 0
    self.max_step = max_step
    self.momentum = momentum

    self.__initial_lr = [group['lr'] for group in self.param_groups]

    def step(self, closure=None):

      if self.global_step < self.max_step:
        lr_mult = (1 - self.global_step / self.max_step) ** self.momentum

        for i in range(len(self.param_groups)):
          self.param_groups[i]['lr'] = self.__initial_lr[i] * lr_mult

      super().step(closure)

      self.global_step += 1


In [20]:
class AverageMeter:
  def __init__(self, *keys):
    self.__data = dict()
    for key in keys:
      self.__data[key] = [0.0, 0]

  def add(self, dict):
    for k, v in dict.items():
      if k not in self.__data:
        self.__data[k] = [0.0, 0]
      self.__data[k][0] += v
      self.__data[k][1] += 1

  def get(self, *keys):
    if len(keys) == 1:
      return self.__data[keys[0]][0] / self.__data[keys[0]][1]
    else:
      v_list = [self.__data[k][0] / self.__data[k][1] for k in keys]
      return tuple(v_list)

  def pop(self, key=None):
    if key is None:
      for k in self.__data.keys():
        self.__data[k] = [0.0, 0]
    else:
      v = self.get(key)
      self.__data[key] = [0.0, 0]
      return v

In [21]:
#用于多标签分类的Resnet模型
class Net(nn.Module):
  ## definition of params
  def __init__(self, stride = 16, n_classes = 19):
    super(Net, self).__init__()
    if stride == 16:
      self.resnet50 = resnet50(pretrained = True, strides = (2, 2, 2, 1))
      self.stage1 = nn.Sequential(self.resnet50.conv1, self.resnet50.bn1, self.resnet50.relu, self.resnet50.maxpool, self.resnet50.layer1)
    else:
      self.resnet50 = resnet50(pretrained = True, stides = (2, 2, 1, 1), dilations = (1, 1, 2, 2))
      self.stage1 = nn.Sequential(self.resnet50.conv1, self.resnet50.bn1, self.resnet50.relu, self.resnet50.maxpool, self.resnet50.layer1)
    self.stage2 = nn.Sequential(self.resnet50.layer2)
    self.stage3 = nn.Sequential(self.resnet50.layer3)
    self.stage4 = nn.Sequential(self.resnet50.layer4)
    self.n_classes = n_classes
    #改进 使用卷积层作为一个标准分类器 而不是全连接层
    self.classifier = nn.Conv2d(2048, n_classes, 1, bias = False)
    #嵌套 吧各个层封装起来 方便定位到具体层， 以提取某些参数信息
    self.backbone = nn.ModuleList([self.stage1, self.stage2, self.stage3, self.stage4])
    self.newly_added = nn.ModuleList([self.classifier])

  #forward propagation
  def forward(self, x):

    x = self.stage1(x)
    x = self.stage2(x)

    x = self.stage3(x)
    x = self.stage4(x)

    #用gap处理， 用卷积层完成分类
    x = tools.gap2d(x, keepdims = True)
    x = self.classifier(x)
    x = x.view(-1, self.n_classes)

    return x

  def train(self, mode = True):
    super(Net, self).train(mode)
    for p in self.resnet50.conv1.parameters():
      p.requires_grad = False
    for p in self.resnet50.bn1.parameters():
      p.requires_grad = False

  def trainable_parameters(self):

    return (list(self.backbone.parameters()), list(self.newly_added.parameters()))


# Model Train and Validation

In [23]:
class AverageMeter:
  def __init__(self, *keys):
    self.__data = dict()
    for k in keys:
      self.__data[k] = [0.0, 0]

  def add(self, dict):
    for k, v in dict.items():
      if k not in self.__data:
        self.__data[k] = [0.0, 0]
      self.__data[k][0] += v
      self.__data[k][1] += 1

  def get(self, *keys):
    if len(keys) == 1:
      return self.__data[keys[0]][0] / self.__data[keys[0]][1]
    else:
      v_list = [self.__data[k][0] / self.__data[k][1] for k in keys]
      return tuple(v_list)

  def pop(self, key = None):
    if key is None:
      for k in self.__data.keys():
        self.__data[k] = [0.0, 0]
    else:
      v = self.get(key)
      self.__data[key] = [0.0, 0]
      return v

In [24]:
class Timer:
  def __init__(self, starting_msg = None):
    self.start = time.time()
    self.stage_start = self.start

    if starting_msg is not None:
      print(starting_msg, time.ctime(time.time()))

  def __enter__(self):
    return self

  def __exit__(self, exc_type, exc_val, exc_tb):
    return

  def update_progress(self, progress):
    self.elapsed = time.time() - self.start
    self.est_total = self.elapsed / progress
    self.est_remaining = self.est_total - self.elapsed
    self.est_finish = int(self.start + self.est_total)

  def str_estimated_complete(self):
    return str(time.ctime(self.est_finish))

  def get_stage_elapsed(self):
    return time.time() - self.stage_start

  def reset_stage(self):
    self.stage_start = time.time()

  def lapse(self):
    out = time.time() - self.stage_start
    self.stage_start = time.time()
    return out

In [25]:
def validate(model, data_loader):
  print("validating ...", flush = True, end = '')
  #构建计算器
  val_loss_meter = AverageMeter('loss1', 'loss2')
  #转测试模式
  model.eval()
  #不更新梯度做计算
  with torch.no_grad():
    count = 1
    for pack in data_loader:
      img = pack[0]

      label = pack[1].cuda(non_blocking = True)

      x = model(img)
      loss = F.multilabel_soft_margin_loss(x, label)

      val_loss_meter.add({'loss': loss.item()})
      count += 1

  model.train()
  #打印结果
  print("loss:{0}".format(val_loss_meter.pop('loss')))

  return

In [31]:
#训练过程
def run(args):
  print('train_recam')
  #获取模型
  model = args.model

  #总的迭代次数
  max_step = (len(train_input) // args.cam_batch_size) * args.cam_num_epoches

  #读取训练数据
  train_loader = args.train_loader

  #读取验证数据
  val_loader = args.val_loader

  #获取模型参数
  param_groups = model.trainable_parameters()

  #构建优化器
  optimizer = PolyOptimizer([{'params':param_groups[0], 'lr':args.cam_learning_rate, 'weight_decay':args.cam_weight_decay},
                             {'params':param_groups[1], 'lr':10*args.cam_learning_rate, 'weight_decay':args.cam_weight_decay},],
                            lr = args.cam_learning_rate, weight_decay = args.cam_weight_decay, max_step = max_step)

  #并行训练
  model = torch.nn.DataParallel(model).cuda()

  #转训练模式
  model.train()

  #评价指标的计算器
  avg_meter = AverageMeter()

  #时间记录
  timer = Timer()

  #训练
  for ep in range(args.cam_num_epoches):
    #输出当前epoch
    print("EPpoch %d/%d" % (ep+1, args.cam_num_epoches))
    #单个epoch内循环
    for step, pack in enumerate(train_loader):
      #获取图像
      img = pack[0]
      img = img.cuda()
      #获取label
      label = pack[1].cuda(non_blocking = True)
      #前向传播
      print("start Training")
      x = model(img)
      #优化器清零
      optimizer.zero_grad()
      #求损失
      loss = F.multilabel_soft_margin_loss(x, label)
      #反向传播
      loss.backward()
      #添加指标值计算器
      avg_meter.add({'loss':loss.item()})
      #更新梯度
      optimizer.step()
      #打印结果
      if (optimizer.global_step-1)%100 == 0:
        timer.update_progress(optimizer.global_step/max_step)
        print('step:%5d/%5d' % (optimizer.global_step - 1, max_step),
              'loss:%.4f' % (avg_meter.pop('loss')),
              'imps:%.1f' % ((step+1) * args.cam_batch_size / timer.get_stage_elapsed()),
              'lr: %.4f' % (optimizer.param_groups[0]['lr']),
              'etc:%s' % (timer.str_estimated_complete()), flush = True)

      #验证
      validate(model, val_loader)

      #完成一次训练和验证后，时间重置
      timer.reset_stage()

    #保存模型
    torch.save(model.state_dict(), args.cam_weights_name)

    #清空内存
    torch.cuda.empty_cache()


In [29]:
class Args:
  def __init__(self, model = None, cam_batch_size = 32, cam_num_epoches = 5, train_loader = None, val_loader = None, cam_learning_rate = 0.1, 
               cam_weight_decay = 1e-4, cam_weights_name = 'res50_cam.pth', recam_num_epoches = 4, recam_learning_rate = 0.005,
              recam_loss_weight = 1.0, recam_weights_name = 'res50_recam.pth'):
    self.cam_batch_size = cam_batch_size
    self.cam_num_epoches = cam_num_epoches
    self.train_loader = train_loader
    self.val_loader = val_loader
    self.cam_learning_rate = cam_learning_rate
    self.cam_weight_decay = cam_weight_decay
    self.cam_weights_name = cam_weights_name
    self.recam_num_epoches = recam_num_epoches
    self.recam_learning_rate = recam_learning_rate
    self.recam_loss_weight = recam_loss_weight
    self.recam_weights_name = recam_weights_name
    self.model = model

In [37]:
model = Net() #Net_CAM_Feature()
args = Args(model = model, train_loader = train_loader, val_loader = test_loader)

model pretrained initialized


In [38]:
run(args)

train_recam
EPpoch 1/5
start Training
validating ...loss:0.4450549038784107
start Training
validating ...loss:0.30692647217034436
start Training
validating ...loss:0.743657273262637
start Training
validating ...loss:0.39837452351356456
start Training
validating ...loss:0.4478188693208131
start Training
validating ...loss:0.8029841672145512
start Training
validating ...loss:0.33083175248457836
start Training
validating ...loss:0.273934426542088
start Training
validating ...loss:0.20961197424799613
start Training
validating ...loss:0.24558925466620962
start Training
validating ...loss:0.33398831612230717
start Training
validating ...loss:0.4131757351587056
start Training
validating ...loss:0.27848407739472114
start Training
validating ...loss:0.2538427874450644
start Training
validating ...loss:0.2646624039960036
start Training
validating ...loss:0.23516859399185308
start Training
validating ...loss:0.24581260886622716
start Training
validating ...loss:0.30986607968985835
start Training


KeyboardInterrupt: 

In [78]:
run_recam(args)

train_recam
model pretrained initialized
Epoch 1/4
step:    0/  108 loss_cls:0.7559 loss_ce:2.9595 acc:0.2453 imps:28.8 lr: 0.0050 etc:Fri May  3 14:25:39 2024
validating ...loss:0.7400047689968257
validating ...loss:0.7363772525993394
validating ...loss:0.733020637022945
validating ...loss:0.7293031136856949
validating ...loss:0.7259202779375136
validating ...loss:0.7221603720998752
validating ...loss:0.7189266983653021
validating ...loss:0.7151190768985944
validating ...loss:0.7116154654492842
validating ...loss:0.708352490481164
validating ...loss:0.7052666461798502
validating ...loss:0.7017235893652789
validating ...loss:0.6981354524036207
validating ...loss:0.694371112959046
validating ...loss:0.6911399859329792
validating ...loss:0.6878012940772432
validating ...loss:0.6841048654627047
validating ...loss:0.6808094411488469
validating ...loss:0.6773237209437837
validating ...loss:0.673968361534532
validating ...loss:0.6707473721901844
validating ...loss:0.666931203444482
validatin

KeyboardInterrupt: 

# Predict

In [79]:
img =  pd.read_csv(r"C:\Users\Tang\Desktop\5329\multi-label-classification-competition-2024\COMP5329S1A2Dataset\test.csv", usecols = [0, 1])
img['ImageID']

0       30000.jpg
1       30001.jpg
2       30002.jpg
3       30003.jpg
4       30004.jpg
          ...    
9995    39995.jpg
9996    39996.jpg
9997    39997.jpg
9998    39998.jpg
9999    39999.jpg
Name: ImageID, Length: 10000, dtype: object

In [80]:
final_imgs = [path + "\\{0}".format(i) for i in img['ImageID']]
final_data = []
for i in range(len(final_imgs)):
  final_data.append([final_imgs[i], train_label[i, ]])
final_input = MyDataset(final_data , transform=transform, loader=Myloader)

In [81]:
final_imgs

['C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30000.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30001.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30002.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30003.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30004.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30005.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30006.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classification-competition-2024\\COMP5329S1A2Dataset\\data\\30007.jpg',
 'C:\\Users\\Tang\\Desktop\\5329\\multi-label-classifica

In [84]:
batch_size = 32
final_loader = DataLoader(final_input, batch_size=batch_size, shuffle=True, num_workers = 0, pin_memory = False, drop_last = True)

In [57]:
#验证
print("validating ...", flush = True, end = '')
#构建计算器
val_loss_meter = AverageMeter('loss1', 'loss2')
#转测试模式
model.eval()
#不更新梯度做计算
with torch.no_grad():
    for pack in test_loader:
      img = pack[0].cuda()
    
      label = pack[1].cuda(non_blocking = True)
    
      x = model(img)
      print(x[10])
      print([1 if i > 0 else 0 for i in x[10]])
      print(label[10])
      loss = F.multilabel_soft_margin_loss(x, label)
      print(loss)
    
      
      

validating ...tensor([ 0.6346, -2.3980, -1.7309, -2.7262, -3.0924, -2.5741, -2.3848, -2.2096,
        -2.7226, -3.0521, -2.8801, -4.9646, -3.8064, -4.2725, -2.8715, -2.5284,
        -2.6697, -2.2541, -3.0401], device='cuda:0')
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
tensor([0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
        0.], device='cuda:0', dtype=torch.float64)
tensor(0.2476, device='cuda:0', dtype=torch.float64)
tensor([ 0.8325, -2.5739, -1.9012, -2.8389, -3.2696, -2.6912, -2.6106, -2.3787,
        -2.9921, -3.2275, -3.1088, -5.2031, -3.9678, -4.4607, -3.1756, -2.8049,
        -2.7472, -2.3873, -3.3563], device='cuda:0')
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.], device='cuda:0', dtype=torch.float64)
tensor(0.2194, device='cuda:0', dtype=torch.float64)
tensor([ 0.6858, -2.3543, -1.7661, -2.6461, -3.0238, -2.5376, -2.3712, -2.15

0       30000.jpg
1       30001.jpg
2       30002.jpg
3       30003.jpg
4       30004.jpg
          ...    
9995    39995.jpg
9996    39996.jpg
9997    39997.jpg
9998    39998.jpg
9999    39999.jpg
Name: ImageID, Length: 10000, dtype: object

In [None]:
x = model(img)

In [68]:
#保存模型
torch.save(model.state_dict(), args.cam_weights_name)

# Advanced Model

In [53]:
class Net_CAM_Feature(Net):

    def __init__(self,stride=16,n_classes=19):
        super(Net_CAM_Feature, self).__init__(stride=stride,n_classes=n_classes)
    
    # 前向传播
    def forward(self, x):

        x = self.stage1(x)
        x = self.stage2(x)

        x = self.stage3(x)
        feature = self.stage4(x) # bs*2048*32*32

        x = tools.gap2d(feature, keepdims=True)
        x = self.classifier(x)
        x = x.view(-1, self.n_classes)

        # 计算cams（类激活图） --> 对每一类别的响应值
        cams = F.conv2d(feature, self.classifier.weight)
        cams = F.relu(cams)
        cams = cams/(F.adaptive_max_pool2d(cams, (1, 1)) + 1e-5) # 归一化 bs,20,1,1
        # 将cam乘到原始的特征图中
        cams_feature = cams.unsqueeze(2)*feature.unsqueeze(1) # bs*20*2048*32*32
        cams_feature = cams_feature.view(cams_feature.size(0),cams_feature.size(1),cams_feature.size(2),-1) # 在尺寸空间上做了一个拉伸
        cams_feature = torch.mean(cams_feature,-1) # 在尺寸空间上做平均 bs,20,2048
        
        return x,cams_feature,cams

In [32]:
def run_recam(args):
    print('train_recam')
    model = args.model
    param_groups = model.trainable_parameters()
   # model.load_state_dict(torch.load(args.cam_weights_name), strict=True)
    model = torch.nn.DataParallel(model).cuda()

    recam_predictor = Class_Predictor(19, 2048)
    recam_predictor = torch.nn.DataParallel(recam_predictor).cuda()
    recam_predictor.train()

    
    #读取训练数据
    train_loader = args.train_loader
    
    #读取验证数据
    val_loader = args.val_loader
    
    max_step = (len(train_loader) // args.cam_batch_size) * args.recam_num_epoches

    optimizer = PolyOptimizer([
        {'params': param_groups[0], 'lr': 0.1*args.recam_learning_rate, 'weight_decay': args.cam_weight_decay},
        {'params': param_groups[1], 'lr': 0.1*args.recam_learning_rate, 'weight_decay': args.cam_weight_decay},
        {'params': recam_predictor.parameters(), 'lr': args.recam_learning_rate, 'weight_decay': args.cam_weight_decay},
    ], lr=args.recam_learning_rate, weight_decay=args.cam_weight_decay, max_step=max_step)

    avg_meter = AverageMeter()

    timer = Timer()
    global_step = 0
    for ep in range(args.recam_num_epoches):

        print('Epoch %d/%d' % (ep+1, args.recam_num_epoches))
        model.train()

        for step, pack in enumerate(train_loader):

            img = pack[0].cuda()
            label = pack[1].cuda(non_blocking=True)
            x,cam,_ = model(img) # 输出结果和第一个模型有区别

            
            loss_cls = F.multilabel_soft_margin_loss(x, label)
            loss_ce,acc = recam_predictor(cam,label)
            loss = loss_cls + args.recam_loss_weight*loss_ce

            avg_meter.add({'loss_cls': loss_cls.item()})
            avg_meter.add({'loss_ce': loss_ce.item()})
            avg_meter.add({'acc': acc.item()})
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            global_step += 1

            if (global_step-1)%100 == 0:
                timer.update_progress(global_step / max_step)

                print('step:%5d/%5d' % (global_step - 1, max_step),
                      'loss_cls:%.4f' % (avg_meter.pop('loss_cls')),
                      'loss_ce:%.4f' % (avg_meter.pop('loss_ce')),
                      'acc:%.4f' % (avg_meter.pop('acc')),
                      'imps:%.1f' % ((step + 1) * args.cam_batch_size / timer.get_stage_elapsed()),
                      'lr: %.4f' % (optimizer.param_groups[2]['lr']),
                      'etc:%s' % (timer.str_estimated_complete()), flush=True)
        
            validate_recam(model, val_loader)
        timer.reset_stage()
        torch.save(model.state_dict(), args.recam_weights_name)
        torch.save(recam_predictor.module.state_dict(), osp.join(args.recam_weight_name,'recam_predictor_'+str(ep+1) + '.pth'))
    torch.cuda.empty_cache()

In [34]:
class Class_Predictor(nn.Module):
    def __init__(self, num_classes, representation_size):
        super(Class_Predictor, self).__init__()
        self.num_classes = num_classes
        self.classifier = nn.Conv2d(representation_size, num_classes, 1, bias=False)
        
    # 前向传播
    def forward(self, x, label):
        batch_size = x.shape[0]
        x = x.reshape(batch_size,self.num_classes,-1) # bs*20*2048
        # 初始label是multi-hot 判别后的label是一个bool矩阵  （1，0，0，0，1） --> (T,F,F,F,T)
        mask = label>0 # bs*20 
        # mask[i] 1*20  x[i] 1*20*2048 --> 是true的保留，是false的剔除 --> out[i] 1*n*2048
        feature_list = [x[i][mask[i]] for i in range(batch_size)] # bs*n*2048 每个张量都有唯一的类别标签
        prediction = [self.classifier(y.unsqueeze(-1).unsqueeze(-1)).squeeze(-1).squeeze(-1) for y in feature_list] # 分类预测
        labels = [torch.nonzero(label[i]).squeeze(1) for i in range(label.shape[0])] # nonzero （1，0，0，0，1）--> (0,4) 转换成了一个单标签问题

        loss = 0
        acc = 0
        num = 0
        for logit,label in zip(prediction, labels):
            if label.shape[0] == 0:
                continue
            loss_ce= F.cross_entropy(logit, label)
            loss += loss_ce
            acc += (logit.argmax(dim=1)==label.view(-1)).sum().float()
            num += label.size(0)
            
        return loss/batch_size, acc/num

In [77]:
def validate_recam(model, data_loader):
  print("validating ...", flush = True, end = '')
  #构建计算器
  val_loss_meter = AverageMeter('loss1', 'loss2')
  #转测试模式
  model.eval()
  #不更新梯度做计算
  with torch.no_grad():
    count = 1
    for pack in data_loader:
      img = pack[0]

      label = pack[1].cuda(non_blocking = True)

      x, _, _ = model(img)
      loss = F.multilabel_soft_margin_loss(x, label)

      val_loss_meter.add({'loss': loss.item()})
      count += 1

  model.train()
  #打印结果
  print("loss:{0}".format(val_loss_meter.pop('loss')))

  return

In [None]:
### 由于标签极不平衡，因此需要转化成（1，4）这种单标签来做训练