<a href="https://colab.research.google.com/github/DanielDLX/emotionRL/blob/master/g10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
from PIL import Image
import os
import sys
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from torchvision.transforms import transforms
from torchvision.utils import make_grid
import gc
import zipfile
import shutil

In [2]:
source_path = os.path.abspath(r'/content/drive/My Drive/eRL')
target_path = os.path.abspath(r'/eRL')

if not os.path.exists(target_path):
    # 如果目标路径不存在原文件夹的话就创建
    os.makedirs(target_path)

if os.path.exists(source_path):
    # 如果目标路径存在原文件夹的话就先删除
    shutil.rmtree(target_path)

shutil.copytree(source_path, target_path)

'/eRL'

In [3]:
dataset_path = os.path.join('/eRL/RAFDB.zip')
with zipfile.ZipFile(dataset_path) as zf:
    zf.extractall('/dataset')

In [4]:
BATCH_SIZE=8
GROUP_NUM=3
CLASSNUM_PRE = 87020  # 用MS-Celeb-1M预训练模型时最后一层节点数
CLASSNUM = 7
feature_size = 256

In [5]:
path_list = os.path.join('/dataset/')
path_image = '/dataset/aligned/'
train_list_filename = os.path.join(path_list, 'list_train.txt')
test_list_filename = os.path.join(path_list, 'list_test.txt')

G_size = 12271
G_image_name = []
G_o_label = np.zeros((G_size), dtype=int)
G_label = np.zeros((G_size), dtype=int)
G_weights = np.ones((G_size))
G_reweights = np.ones((G_size))
G_valresult = np.ones((G_size))
G_valresult_b = np.ones((G_size))
G_valresult_new = np.ones((G_size))
G_feature = np.zeros((GROUP_NUM, G_size, feature_size))
G_prob_diff = np.zeros((GROUP_NUM, G_size))
G_feature_b = np.zeros((GROUP_NUM, G_size, feature_size))
G_prob_diff_b = np.zeros((GROUP_NUM, G_size))
G_feature_new = np.zeros((GROUP_NUM, G_size, feature_size))
G_prob_diff_new = np.zeros((GROUP_NUM, G_size))

file = open(train_list_filename)
for i, f in enumerate(file):
  G_image_name.append(f.split(' ')[0].split('.')[0] + '_aligned.jpg')
  label = int(f.split(' ')[1])
  label -= 1
  G_o_label[i] = label
  G_label[i] = label
file.close()

In [6]:
ids = np.arange(G_size)
pre_train_ids = np.arange(G_size)
np.random.shuffle(ids)
G_train_ids = []
G_val_ids = []
step = G_size // GROUP_NUM
val_ids = ids[0:step]
train_ids = ids[step:]
G_train_ids.append(train_ids)
G_val_ids.append(val_ids)
for i in range(1,GROUP_NUM-1):
  val_ids = ids[(i*step):((i+1)*step)]
  train_ids = np.concatenate([ids[0:(i*step)],ids[((i+1)*step):]])
  G_train_ids.append(train_ids)
  G_val_ids.append(val_ids)
val_ids = ids[(GROUP_NUM-1)*step:]
train_ids = ids[0:(GROUP_NUM-1)*step]
G_train_ids.append(train_ids)
G_val_ids.append(val_ids)
# for i in range(GROUP_NUM):
#   print(G_train_ids[i])
#   print(G_val_ids[i])
#   print(G_train_ids[i].shape)
#   print(G_val_ids[i].shape)

In [7]:
class Dataset_generator_train(Dataset):

  def __init__(self, root_dir, names_file, ids, transform=False):
    self.root_dir = root_dir
    self.names_file = names_file
    self.transform = transform
    self.names_list = []
    self.ids = ids
    self.size = self.ids.shape[0]

  def __len__(self):
      return self.size

  def __getitem__(self, idx):
    image_path = self.root_dir + G_image_name[self.ids[idx]]
    if not os.path.isfile(image_path):
        print(image_path + 'does not exist!')
        return None
    image = Image.open(image_path)
    image = transforms.Resize((224, 224))(image)
    if self.transform:
        image = transforms.RandomHorizontalFlip(p=0.5)(image)
        image = transforms.ToTensor()(image)
        image = transforms.RandomErasing(p=0.03)(image)
    else:
        image = transforms.ToTensor()(image)
    label = int(G_label[self.ids[idx]])
    o_label = int(G_o_label[self.ids[idx]])
    weights = float(G_weights[self.ids[idx]])
    reweights = float(G_reweights[self.ids[idx]])

    sample = {'image': image, 'label': label, 'o_label': o_label, 'idx': self.ids[idx], 'weights': weights, 'reweights': reweights}

    return sample

In [8]:
class Dataset_generator_test(Dataset):

  def __init__(self, root_dir, names_file, transform=None):
    self.root_dir = root_dir
    self.names_file = names_file
    self.transform = transform
    self.size = 0
    self.names_list = []

    if not os.path.isfile(self.names_file):
        print(self.names_file + 'does not exist!')
    file = open(self.names_file)
    for f in file:
        self.names_list.append(f)
        self.size += 1

  def __len__(self):
      return self.size

  def __getitem__(self, idx):
    image_path = self.root_dir + (self.names_list[idx].split(' ')[0].split('.')[0] + '_aligned.jpg')
    if not os.path.isfile(image_path):
        print(image_path + 'does not exist!')
        return None
    image = Image.open(image_path)
    image = transforms.Resize((224, 224))(image)
    image = transforms.ToTensor()(image)
    label = int(self.names_list[idx].split(' ')[1])
    label -= 1

    sample = {'image': image, 'label': label}

    return sample

In [9]:
trainsets = []
valsets = []
for i in range(GROUP_NUM):
  trainset = Dataset_generator_train(path_image, train_list_filename, G_train_ids[i])
  valset = Dataset_generator_train(path_image, train_list_filename, G_val_ids[i])
  trainsets.append(trainset)
  valsets.append(valset)
pretrainset = Dataset_generator_train(path_image, train_list_filename, pre_train_ids)
testset = Dataset_generator_test(path_image, test_list_filename)

In [10]:
print(pretrainset[0]['image'].size())

torch.Size([3, 224, 224])


In [11]:
# for i in range(GROUP_NUM):
#   print('*****',i,'**********************')
#   j = 0
#   for x in trainsets[i]:
#     if j % 100 == 0:
#       print(x['idx'])
#     j+=1
#   print('*****')
#   j = 0
#   for x in valsets[i]:
#     if j % 100 == 0:
#       print(x['idx'])
#     j+=1

In [12]:
pretrainset_loader = DataLoader(dataset=pretrainset,batch_size=BATCH_SIZE,shuffle=True,num_workers=4)
testset_loader = DataLoader(dataset=testset,batch_size=BATCH_SIZE,shuffle=False,num_workers=4)
trainset_loaders = []
valset_loaders = []
for i in range(GROUP_NUM):
  trainset_loader = DataLoader(dataset=trainsets[i],batch_size=BATCH_SIZE,shuffle=True,num_workers=4)
  valset_loader = DataLoader(dataset=valsets[i],batch_size=BATCH_SIZE,shuffle=False,num_workers=4)
  trainset_loaders.append(trainset_loader)
  valset_loaders.append(valset_loader)
# print(trainset_loaders)
# print(valset_loaders)

In [13]:
def conv3x3(in_planes, out_planes, stride=1):
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

class BasicBlock(nn.Module):
  expansion = 1

  def __init__(self, inplanes, planes, stride=1, downsample=None):
    super(BasicBlock, self).__init__()
    self.conv1 = conv3x3(inplanes, planes, stride)
    self.bn1 = nn.BatchNorm2d(planes)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(planes, planes)
    self.bn2 = nn.BatchNorm2d(planes)
    self.downsample = downsample
    self.stride = stride

  def forward(self, x):
    residual = x

    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)

    if self.downsample is not None:
        residual = self.downsample(x)

    out += residual
    out = self.relu(out)

    return out
  
class ResNet_WCE(nn.Module):

  def __init__(self, block, layers, feature_size=feature_size):
    self.inplanes = 64
    super(ResNet_WCE, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                            bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
    self.avgpool = nn.AvgPool2d(7, stride=1)
    self.feature = nn.Linear(512 * block.expansion, feature_size)
    self.fc_tune = nn.Linear(feature_size, CLASSNUM, bias=False)
    self.fc_weight = nn.Linear(feature_size, 1, bias=False)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
          nn.Conv2d(self.inplanes, planes * block.expansion,
                    kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.view(x.size(0), -1)

    x = self.feature(x)
    x = self.relu(x)

    out = self.fc_tune(x)
    weights = self.fc_weight(x)
    weights = torch.sigmoid(weights)

    return out, weights

class ResNet(nn.Module):

  def __init__(self, block, layers, feature_size=feature_size):
    self.inplanes = 64
    super(ResNet, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                            bias=False)
    self.bn1 = nn.BatchNorm2d(64)
    self.relu = nn.ReLU(inplace=True)
    self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    self.layer1 = self._make_layer(block, 64, layers[0])
    self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
    self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
    self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
    self.avgpool = nn.AvgPool2d(7, stride=1)
    self.feature = nn.Linear(512 * block.expansion, feature_size)
    self.fc_tune = nn.Linear(feature_size, CLASSNUM)

    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()

  def _make_layer(self, block, planes, blocks, stride=1):
    downsample = None
    if stride != 1 or self.inplanes != planes * block.expansion:
      downsample = nn.Sequential(
          nn.Conv2d(self.inplanes, planes * block.expansion,
                    kernel_size=1, stride=stride, bias=False),
          nn.BatchNorm2d(planes * block.expansion),
      )

    layers = []
    layers.append(block(self.inplanes, planes, stride, downsample))
    self.inplanes = planes * block.expansion
    for i in range(1, blocks):
      layers.append(block(self.inplanes, planes))

    return nn.Sequential(*layers)

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)

    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)

    x = self.avgpool(x)
    x = x.view(x.size(0), -1)

    x = self.feature(x)
    x = self.relu(x)

    out = self.fc_tune(x)

    return out, x

In [14]:
def WCE_loss(out, weight, target):
  out = torch.exp(torch.mul(out, weight))
  target = target.view(-1, 1)
  up = torch.gather(out, 1, target)
  down = torch.sum(out, dim=1, keepdim=True)
  out = torch.sum(torch.log(torch.div(up, down)))
  return torch.neg(torch.div(out, weight.size()[0]))

def CE_loss(out, weight, target):
  out = torch.exp(out)
  target = target.view(-1, 1)
  up = torch.gather(out, 1, target)
  down = torch.sum(out, dim=1, keepdim=True)
  out = torch.log(torch.div(up, down))
  out = torch.mul(out.view(-1), weight)
  out = torch.sum(out)
  return torch.neg(torch.div(out, weight.size()[0]))

In [15]:
Net_WCE = ResNet_WCE(BasicBlock, [2, 2, 2, 2]).cuda()

In [16]:
PATH_PRE = '/eRL//ijba_res18_naive.pth.tar'
checkpoint = torch.load(PATH_PRE)
print(checkpoint.keys())
state_dict = checkpoint['state_dict']

dict_keys(['state_dict', 'epoch', 'arch', 'optimizer', 'best_prec1'])


In [17]:
state_dict_adapted = {k.replace('module.', ''): v for k, v in state_dict.items()}
Net_WCE.load_state_dict(state_dict_adapted, strict=False)

_IncompatibleKeys(missing_keys=['fc_tune.weight', 'fc_weight.weight'], unexpected_keys=['fc.weight', 'fc.bias'])

In [18]:
EPOCHS = 4
LEARNING_RATE = 0.0001
optimizer = optim.Adam(Net_WCE.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
for epoch in range(EPOCHS):
  running_loss = 0.0
  i = 0
  c_sum = 0
  t_sum = 0
  for item in pretrainset_loader:
    i += 1
    image = item['image']
    label = item['label']
    idx = item['idx']

    image = image.cuda()
    label = label.cuda()

    optimizer.zero_grad()

    outputs, weights = Net_WCE(image)
    _, predicted = torch.max(outputs.data, 1)

    if epoch == EPOCHS-1:
      for j in range(image.size()[0]):
        G_weights[idx[j]] = weights[j]

    loss = WCE_loss(outputs, weights, label)

    loss.backward()
    optimizer.step()

    total = label.size(0)
    correct = (predicted == label).sum().item()

    c_sum += correct
    t_sum += total
    running_loss += loss.item()
    step = 16
    if i % step == step - 1:
      print('[%d, %5d] loss: %.4f accuracy: %.4f\r' %
            (epoch + 1, i + 1, running_loss / step, correct / total))
      running_loss = 0.0
  scheduler.step()
  print('\nepoch:%d  accuracy: %.4f\n' % (epoch + 1, c_sum / t_sum))

print('\nFinished Training')

[1,    16] loss: 1.8092 accuracy: 0.1250
[1,    32] loss: 1.7452 accuracy: 0.3750
[1,    48] loss: 1.5203 accuracy: 0.5000
[1,    64] loss: 1.4396 accuracy: 0.5000
[1,    80] loss: 1.3297 accuracy: 0.3750
[1,    96] loss: 1.2580 accuracy: 0.6250
[1,   112] loss: 1.2118 accuracy: 0.5000
[1,   128] loss: 1.0933 accuracy: 0.6250
[1,   144] loss: 1.1698 accuracy: 0.5000
[1,   160] loss: 0.9683 accuracy: 0.6250
[1,   176] loss: 1.0735 accuracy: 0.7500
[1,   192] loss: 0.9208 accuracy: 0.5000
[1,   208] loss: 1.0772 accuracy: 0.7500
[1,   224] loss: 1.0502 accuracy: 0.5000
[1,   240] loss: 0.8936 accuracy: 0.6250
[1,   256] loss: 0.9026 accuracy: 0.7500
[1,   272] loss: 0.8357 accuracy: 0.6250
[1,   288] loss: 0.9865 accuracy: 0.6250
[1,   304] loss: 0.7319 accuracy: 0.7500
[1,   320] loss: 0.8857 accuracy: 0.6250
[1,   336] loss: 0.8119 accuracy: 0.8750
[1,   352] loss: 0.7628 accuracy: 0.7500
[1,   368] loss: 0.9007 accuracy: 0.6250
[1,   384] loss: 0.8537 accuracy: 0.5000
[1,   400] loss:

In [19]:
Net_WCE.eval()

correct = 0
total = 0
with torch.no_grad():
  for item in testset_loader:
    images = item['image']
    labels = item['label']
    images = images.cuda()
    labels = labels.cuda()
    outputs, _ = Net_WCE(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %.4f %%' % (
        100 * correct / total))


Accuracy of the network on the test images: 87.4185 %


In [20]:
print(G_weights)

[0.99240726 0.99898416 0.99983943 ... 0.99670035 0.99529207 0.99850416]


In [21]:
del Net_WCE
gc.collect()

322

In [22]:
PATH = '/model{}.pth'
Nets = []
for g in range(GROUP_NUM):
  print('************',g,'*******************')
  Net = ResNet(BasicBlock, [2, 2, 2, 2]).cuda()
  Net.load_state_dict(state_dict_adapted, strict=False)
  Nets.append(Net)
  EPOCHS = 4
  LEARNING_RATE = 0.0001
  optimizer = optim.Adam(Net.parameters(), lr=LEARNING_RATE)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
  for epoch in range(EPOCHS):
    running_loss = 0.0
    i = 0
    c_sum = 0
    t_sum = 0
    for item in trainset_loaders[g]:
      i += 1
      image = item['image']
      label = item['label']
      idx = item['idx']
      weights = item['weights']

      image = image.cuda()
      label = label.cuda()
      weights = weights.cuda()

      optimizer.zero_grad()

      outputs, _ = Net(image)
      _, predicted = torch.max(outputs.data, 1)

      loss = CE_loss(outputs, weights, label)

      loss.backward()
      optimizer.step()

      total = label.size(0)
      correct = (predicted == label).sum().item()

      c_sum += correct
      t_sum += total
      running_loss += loss.item()
      step = 16
      if i % step == step - 1:
        print('[%d, %5d] loss: %.4f accuracy: %.4f\r' %
              (epoch + 1, i + 1, running_loss / step, correct / total))
        running_loss = 0.0
    scheduler.step()
    print('\nepoch:%d  accuracy: %.4f\n' % (epoch + 1, c_sum / t_sum))

  print('\nFinished Training')
  Net.eval()
  torch.save(Net.state_dict(), PATH.format(g))
  correct = 0
  total = 0
  with torch.no_grad():
    for item in valset_loaders[g]:
      images = item['image']
      labels = item['label']
      idx = item['idx']
      images = images.cuda()
      labels = labels.cuda()
      outputs, _ = Net(images)
      _, predicted = torch.max(outputs.data, 1)
      for j in range(images.size()[0]):
        if predicted[j] == labels[j]:
          G_valresult[idx[j]] = 1
        else:
          G_valresult[idx[j]] = 0
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  print('Accuracy of the network on the test images: %.4f %%' % (
          100 * correct / total))

************ 0 *******************
[1,    16] loss: 1.7867 accuracy: 0.3750
[1,    32] loss: 1.6721 accuracy: 0.8750
[1,    48] loss: 1.4726 accuracy: 0.3750
[1,    64] loss: 1.3755 accuracy: 0.2500
[1,    80] loss: 1.4953 accuracy: 0.5000
[1,    96] loss: 1.1371 accuracy: 0.5000
[1,   112] loss: 1.0922 accuracy: 0.7500
[1,   128] loss: 1.3017 accuracy: 0.5000
[1,   144] loss: 1.1056 accuracy: 0.5000
[1,   160] loss: 0.9144 accuracy: 0.6250
[1,   176] loss: 0.9024 accuracy: 0.7500
[1,   192] loss: 1.0077 accuracy: 0.7500
[1,   208] loss: 0.8989 accuracy: 0.5000
[1,   224] loss: 0.8140 accuracy: 0.7500
[1,   240] loss: 0.9553 accuracy: 0.6250
[1,   256] loss: 0.8840 accuracy: 1.0000
[1,   272] loss: 0.7755 accuracy: 0.7500
[1,   288] loss: 0.9171 accuracy: 0.5000
[1,   304] loss: 0.8255 accuracy: 0.6250
[1,   320] loss: 0.6896 accuracy: 1.0000
[1,   336] loss: 0.9790 accuracy: 0.6250
[1,   352] loss: 0.8571 accuracy: 0.6250
[1,   368] loss: 0.7310 accuracy: 0.6250
[1,   384] loss: 1.019

In [23]:
correct = 0
total = 0
for i in range(GROUP_NUM):
  Net.eval()
with torch.no_grad():
  for item in testset_loader:
    images = item['image']
    labels = item['label']
    images = images.cuda()
    labels = labels.cuda()
    predictions = []
    for i in range(GROUP_NUM):
      outputs, _ = Nets[i](images)
      _, predicted = torch.max(outputs.data, 1)
      predictions.append(predicted.view(1,-1))
    predicted = torch.cat(predictions).cpu().numpy()
    prediction = torch.zeros(predicted.shape[1]).cuda()
    for i in range(predicted.shape[1]):
      count = np.bincount(predicted[:,i])
      p = np.argmax(count)
      prediction[i] = p
    total += labels.size(0)
    correct += (prediction == labels).sum().item()
    # print(predicted)
    # print(prediction)
    # print(labels)
    # print(total)
    # print(correct)

print('Accuracy of the network on the test images: %.4f %%' % (
        100 * correct / total))

Accuracy of the network on the test images: 86.5059 %


In [24]:
from collections import deque
import random
import time


ACT_NUM = 3
STATE_NUM = 2
LEARNING_RATE_DQN = 0.0001
factor = 0.9
lr = 1
use_gpu = torch.cuda.is_available()


# torch.set_default_tensor_type(torch.DoubleTensor)


class DQN_Net(torch.nn.Module):

  def __init__(self):
    self.inplanes = STATE_NUM
    super(DQN_Net, self).__init__()
    hidden_note = [32, 16, 8]
    self.layer1 = torch.nn.Linear(STATE_NUM, hidden_note[0])
    self.layer2 = torch.nn.Linear(hidden_note[0], hidden_note[1])
    self.layer3 = torch.nn.Linear(hidden_note[1], hidden_note[2])
    self.layer4 = torch.nn.Linear(hidden_note[2], ACT_NUM)
    self.relu = torch.nn.ReLU(inplace=True)

  def forward(self, x):
    x = self.layer1(x)
    x = self.relu(x)
    x = self.layer2(x)
    x = self.relu(x)
    x = self.layer3(x)
    x = self.relu(x)
    x = self.layer4(x)
    return x


class DDQN(object):
  def __init__(self, LOAD=False):
    self.step = 0
    self.update_freq = 32  # 模型更新频率
    self.replay_size = 4096  # 训练集大小
    self.replay_queue = deque(maxlen=self.replay_size)
    self.model1 = DQN_Net()
    self.model2 = DQN_Net()
    if use_gpu:
      self.model1 = self.model1.cuda()
      self.model2 = self.model2.cuda()
    self.models = [self.model1, self.model2]
    self.index1 = 0
    self.index2 = 1
    self.criterion = torch.nn.MSELoss()
    self.optimizers = [torch.optim.Adam(self.model1.parameters(), lr=LEARNING_RATE_DQN),
                        torch.optim.Adam(self.model2.parameters(), lr=LEARNING_RATE_DQN)]
    if LOAD:
      self.load_model()

  def act(self, s, epsilon=0.1):
    """预测动作"""
    # 刚开始时，加一点随机成分，产生更多的状态
    # if np.random.uniform() < epsilon - self.step * 0.0002:
    #     return np.random.choice([0, 1])
    if use_gpu:
        s = torch.tensor(s.reshape((1, STATE_NUM)), dtype=torch.float).cuda()
    else:
        s = torch.tensor(s.reshape((1, STATE_NUM)), dtype=torch.float)
    y1 = self.model1(s)
    y2 = self.model2(s)
    y = (y1 + y2) / 2

    a = y.cpu()

    return np.argmax(a.detach().numpy()[0])

  def save_model(self, file_path1='./model_save/DDQN1.pth', file_path2='./model_save/DDQN2.pth'):
    print('model saved')
    torch.save(self.model1.state_dict(), file_path1)
    torch.save(self.model1.state_dict(), file_path2)

  def load_model(self, file_path1='./model_save/DDQN1.pth', file_path2='./model_save/DDQN2.pth'):
    print('load model')
    self.model1.load_state_dict(torch.load(file_path1))
    self.model2.load_state_dict(torch.load(file_path2))

  def remember(self, s, a, next_s, reward, done):
    self.replay_queue.append((s, a, next_s, reward, done))

  def train(self, replay_queue, epochs = 1024, batch_size=4, lr=lr, factor=factor):

    for e in range(epochs):
      # 每 update_freq 步，将 model 的权重赋值给 target_model

      if self.step % self.update_freq == 0:
        temp = self.index1
        self.index1 = self.index2
        self.index2 = temp
      self.step += 1

      replay_batch = random.sample(replay_queue, batch_size)
      s_batch = torch.tensor([replay[0] for replay in replay_batch], dtype=torch.float)
      next_s_batch = torch.tensor([replay[2] for replay in replay_batch], dtype=torch.float)

      if use_gpu:
        s_batch = s_batch.cuda()
        next_s_batch = next_s_batch.cuda()

      Q_next = self.models[self.index2](next_s_batch)
      Q_next = Q_next.cpu().detach().numpy()

      self.optimizers[self.index1].zero_grad()

      Q = self.models[self.index1](s_batch)

      Q_target = Q.cpu().detach().numpy()

      # 使用公式更新训练集中的Q值
      for i, replay in enumerate(replay_batch):
        _, a, _, reward, d = replay
        if d:
            Q_target[i][a] = lr * reward
        else:
            Q_target[i][a] = (1 - lr) * Q_target[i][a] + lr * (reward + factor * np.amax(Q_next[i]))

      if use_gpu:
        Q_target = torch.from_numpy(Q_target).cuda()
      else:
        Q_target = torch.from_numpy(Q_target)

      #         print(s_batch.cpu().detach().numpy())
      #         print(next_s_batch.cpu().detach().numpy())

      loss = self.criterion(Q, Q_target)
      loss.backward()
      self.optimizers[self.index1].step()
      if np.isnan(loss.cpu().detach().numpy()):
        print(s_batch.cpu().detach().numpy())
        print(next_s_batch.cpu().detach().numpy())
      print(loss.cpu().detach().numpy(), self.index1)



In [25]:
agent = DDQN()
replay_queue = deque(maxlen=4096)

In [26]:
def collect_information(feature, prob_diff):
  for i in range(GROUP_NUM):
    Net = Nets[i]
    Net.eval()
    c_sum = 0
    t_sum = 0
    with torch.no_grad():
      for item in pretrainset_loader:
        image = item['image']
        label = item['label'].data
        idx = item['idx'].data
        image = image.cuda()
        label = label.cuda()

        outputs_out, feature_out = Net(image)
        prob = F.softmax(outputs_out, dim=1)
        prob_max, predicted = torch.max(prob.data, 1)
        for j, x in enumerate(idx):
          feature[i, x] = feature_out.cpu().data[j]
          if label[j] == predicted[j]:
            prob_diff[i, x] = 0  # 代表预测正确
          else:
            prob_diff[i, x] = prob[j, label[j]] - prob_max[j]  # 小于0， 代表预测错误

        total = label.size(0)
        correct = (predicted == label).sum().item()
        c_sum += correct
        t_sum += total
  val_result = []
  for i in range(GROUP_NUM):
    Net = Nets[i]
    Net.eval()
    c_sum = 0
    t_sum = 0
    with torch.no_grad():
      for item in valset_loaders[i]:
        image = item['image']
        label = item['label'].data
        idx = item['idx'].data
        image = image.cuda()
        label = label.cuda()

        outputs_out, feature_out = Net(image)
        prob = F.softmax(outputs_out, dim=1)
        prob_max, predicted = torch.max(prob.data, 1)

        total = label.size(0)
        correct = (predicted == label).sum().item()
        c_sum += correct
        t_sum += total
    val_result.append(c_sum / t_sum)
  return val_result


In [27]:
def group(sh=0.7):
  groups = []
  gflages = []
  for i in range(GROUP_NUM):
    weights = torch.tensor(G_weights)
    index = weights < -100
    for j in G_val_ids[i]:
      index[j] = True
    weights_temp = weights[index]
    index_temp = np.arange(G_size)[index]
    n = weights_temp.size()[0]
    m = int(n * sh)
    weight_sort, index_sort = torch.sort(weights_temp, descending=True)
    g_0 = weights < -100
    g_1 = weights < -100
    g_2 = weights < -100
    g_3 = weights < -100
    for j in range(m):
      idx = index_temp[index_sort[j]]
      if G_prob_diff[i, idx] < 0:
          g_0[idx] = True
      else:
          g_1[idx] = True
    for j in range(m,n):
      idx = index_temp[index_sort[j]]
      if G_prob_diff[i, idx] < 0:
          g_2[idx] = True
      else:
          g_3[idx] = True
    if G_prob_diff[i][g_0].shape[0] > 0:
        groups.append(g_0)
        gflages.append(i)
    if G_prob_diff[i][g_1].shape[0] > 0:
        groups.append(g_1)
        gflages.append(i)
    if G_prob_diff[i][g_2].shape[0] > 0:
        groups.append(g_2)
        gflages.append(i)
    if G_prob_diff[i][g_3].shape[0] > 0:
        groups.append(g_3)
        gflages.append(i)
  return groups, gflages

In [28]:
def get_state(weights_in, prob_diff_in, feature, g, gflag):
  state = []
  mean_w = np.mean(weights_in[g])
  state.append(mean_w)
  mean_p = np.mean(prob_diff_in[gflag, g])
  state.append(mean_p)
  return np.array(state)

In [29]:
def train(Net_in, trainset_loader_in, epochs, learning_rate):
    Net_in.train()
    optimizer = optim.Adam(Net_in.parameters(), lr=learning_rate)
    for epoch in range(epochs):  # loop over the dataset multiple times
      running_loss = 0.0
      i = 0
      c_sum = 0
      t_sum = 0
      for item in trainset_loader_in:
        i += 1
        image = item['image']
        label = item['label']
        reweights = item['reweights']

        image = image.cuda()
        label = label.cuda()
        reweights = reweights.cuda()

        optimizer.zero_grad()

        outputs, _= Net_in(image)
        _, predicted = torch.max(outputs.data, 1)

        # loss = CE_loss(outputs, weights * reweights.view(weights.size()), label)
        loss = CE_loss(outputs, reweights, label)
        loss.backward()
        optimizer.step()

        total = label.size(0)
        correct = (predicted == label).sum().item()

        c_sum += correct
        t_sum += total
        # print statistics
        running_loss += loss.item()
        step = 16
        if i % step == step - 1:  # print every 2000 mini-batches
          print('[%d, %5d] loss: %.4f accuracy: %.4f' %
                (epoch + 1, i + 1, running_loss / step, correct / total))
          running_loss = 0.0
      print('\nepoch:%d  accuracy: %.4f\n' % (epoch + 1, c_sum / t_sum))


In [None]:
TIMES = 4
for t in range(TIMES):
  re = collect_information(G_feature, G_prob_diff)
  print('\nFinished collect information for grouping reward_base %.4f\n' % (re[0]))

  for i in range(GROUP_NUM):
    train(Nets[i], trainset_loaders[i], 1, LEARNING_RATE)
  reward_base = collect_information(G_feature_b, G_prob_diff_b)
  print('\nFinished collect information for grouping reward_base %.4f\n' % (reward_base[0]))
  # for i in range(10):
  #     print('\n********************')
  #     print(G_label[i*100])
  #     print(G_prob_diff[i * 100])
  #     print(G_weignts[i * 100])
  #     print(G_feature[i * 100])
  epoch = 0
  groups, gflags = group()
  for i in range(len(groups)):
    print('#############',i,'################')
    g = groups[i]
    flag = gflags[i]
    print(G_prob_diff[flag, g].shape[0])
    for a in range(3):
      print('!!!!!!!!!!!!!!!!!',a,'!!!!!!!!!!!!!!!!!')
      epoch += 1
      G_reweights = G_weights.copy()
      if a == 0:
        G_reweights[g] = G_weights[g] * 1.2
      elif a == 2:
        G_reweights[g] = G_weights[g] / 1.2
      else:
        s = get_state(G_weights, G_prob_diff, G_feature, g, flag)
        next_s = get_state(G_weights, G_prob_diff_b, G_feature_b, g, flag)
        replay_queue.append((s, a, next_s, 0, False))
        continue

      s = get_state(G_weights, G_prob_diff, G_feature, g, flag)
      for x in range(GROUP_NUM):
        if x == flag:
          continue
        Net = Nets[x]
        Net.load_state_dict(torch.load(PATH.format(x)), strict=True)
        train(Net, trainset_loaders[x], 1, LEARNING_RATE)
      acc = collect_information(G_feature_new, G_prob_diff_new)
      next_s = get_state(G_reweights, G_prob_diff_new, G_feature_new, g, flag)
      reward = 0
      for x in range(GROUP_NUM):
        if x == flag:
          continue
        reward += acc[x] - reward_base[x]
      replay_queue.append((s, a, next_s, reward * 100, False))
      print(epoch, 'reward', reward)

  agent.train(replay_queue, epochs=2048, batch_size=4)

  G_reweights = G_weights.copy()
  for i in range(len(groups)):
    g = groups[i]
    flag = gflags[i]
    s = get_state(G_weights, G_prob_diff, G_feature, g, flag)
    a = agent.act(s)
    if a == 0:
      G_reweights[g] = G_weights[g] * 1.2
    elif a == 2:
      G_reweights[g] = G_weights[g] / 1.2
  for x in range(GROUP_NUM):
    Net = Nets[x]
    Net.load_state_dict(torch.load(PATH.format(x)), strict=True)
    train(Net, trainset_loaders[x], 1, LEARNING_RATE)
    torch.save(Net.state_dict(), PATH.format(x))
  G_weights = G_reweights


  correct = 0
  total = 0
  for i in range(GROUP_NUM):
    Net.eval()
  with torch.no_grad():
    for item in testset_loader:
      images = item['image']
      labels = item['label']
      images = images.cuda()
      labels = labels.cuda()
      predictions = []
      for i in range(GROUP_NUM):
        outputs, _ = Nets[i](images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.append(predicted.view(1,-1))
      predicted = torch.cat(predictions).cpu().numpy()
      prediction = torch.zeros(predicted.shape[1]).cuda()
      for i in range(predicted.shape[1]):
        count = np.bincount(predicted[:,i])
        p = np.argmax(count)
        prediction[i] = p
      total += labels.size(0)
      correct += (prediction == labels).sum().item()
      # print(predicted)
      # print(prediction)
      # print(labels)
      # print(total)
      # print(correct)

  print('*******************')
  print('*******************')
  print('Accuracy of the network on the test images: %.4f %%' % (
        100 * correct / total))


Finished collect information for grouping reward_base 0.8582

[1,    16] loss: 0.1169 accuracy: 1.0000
[1,    32] loss: 0.1153 accuracy: 1.0000
[1,    48] loss: 0.1318 accuracy: 1.0000
[1,    64] loss: 0.1453 accuracy: 1.0000
[1,    80] loss: 0.2495 accuracy: 1.0000
[1,    96] loss: 0.2084 accuracy: 0.6250
[1,   112] loss: 0.0946 accuracy: 1.0000
[1,   128] loss: 0.1179 accuracy: 1.0000
[1,   144] loss: 0.1773 accuracy: 0.8750
[1,   160] loss: 0.0708 accuracy: 1.0000
[1,   176] loss: 0.0911 accuracy: 0.8750
[1,   192] loss: 0.1147 accuracy: 1.0000
[1,   208] loss: 0.0969 accuracy: 1.0000
[1,   224] loss: 0.0975 accuracy: 1.0000
[1,   240] loss: 0.1202 accuracy: 1.0000
[1,   256] loss: 0.1112 accuracy: 1.0000
[1,   272] loss: 0.1167 accuracy: 1.0000
[1,   288] loss: 0.1842 accuracy: 1.0000
[1,   304] loss: 0.2537 accuracy: 0.8750
[1,   320] loss: 0.1197 accuracy: 1.0000
[1,   336] loss: 0.1174 accuracy: 0.8750
[1,   352] loss: 0.2016 accuracy: 1.0000
[1,   368] loss: 0.1069 accuracy: 0

In [None]:
correct = 0
total = 0
for i in range(GROUP_NUM):
  Net.eval()
with torch.no_grad():
  for item in testset_loader:
    images = item['image']
    labels = item['label']
    images = images.cuda()
    labels = labels.cuda()
    predictions = []
    for i in range(GROUP_NUM):
      outputs, _ = Nets[i](images)
      _, predicted = torch.max(outputs.data, 1)
      predictions.append(predicted.view(1,-1))
    predicted = torch.cat(predictions).cpu().numpy()
    prediction = torch.zeros(predicted.shape[1]).cuda()
    for i in range(predicted.shape[1]):
      count = np.bincount(predicted[:,i])
      p = np.argmax(count)
      prediction[i] = p
    total += labels.size(0)
    correct += (prediction == labels).sum().item()
    # print(predicted)
    # print(prediction)
    # print(labels)
    # print(total)
    # print(correct)

print('Accuracy of the network on the test images: %.4f %%' % (
        100 * correct / total))