In [1]:
import os
import copy
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets as ds
from torch.utils.data import DataLoader,Dataset,Subset,ConcatDataset,random_split
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import tqdm

In [2]:
print(os.getcwd())

/home/zhixin/Project/FL_CelebA_10_27


In [3]:
# 把数据缩放到（-1，1）
class Oneone(torch.nn.Module):
    def __init__(self, inplace=False):
        super().__init__()
        self.inplace = inplace

    def forward(self, tensor):
        return tensor*2.0-1.0
        # return F.normalize(tensor, self.mean, self.std, self.inplace)

# transform = transforms.Compose是把一系列图片操作组合起来，比如减去像素均值等。
# DataLoader读入的数据类型是PIL.Image
# 这里对图片不做任何处理，仅仅是把PIL.Image转换为torch.FloatTensor，从而可以被pytorch计算
transform_train = transforms.Compose([transforms.CenterCrop(224),
                                       transforms.Resize((128, 128)),
                                       #transforms.Grayscale(),
                                       #transforms.Lambda(lambda x: x/255.),
                                       transforms.ToTensor()])
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    Oneone(),
])

In [4]:
learning_rate = 0.001
batch_size = 128

In [5]:
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
intermediate_result = {}
net_name = "VGG16"

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512*4*4, 2)
        global intermediate_result

    def forward(self, x):
        seq = self.features
        out = x
        for i,layer in enumerate(seq):
            out = layer(out)

            if type(layer) == torch.nn.modules.conv.Conv2d:
                intermediate_result[str(i)] = out
#         out = self.features(x)
        out = out.view(out.size(0), -1)
        intermediate_result["linear"] = out
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]

                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)


In [6]:
net = VGG(net_name)
print(net)
# 定义损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
optimizer_2 = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)



# 如果有gpu就使用gpu，否则使用cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = net.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

#  加载CelebA数据集

In [7]:
df_train=pd.read_csv("../celeba/celeba-train.csv", index_col=0)
df_test=pd.read_csv("../celeba/celeba-test.csv",index_col=0)
for index,column in enumerate(df_train.columns):
    print(str(index)+" "+column)
df_train.head()

0 5_o_Clock_Shadow
1 Arched_Eyebrows
2 Attractive
3 Bags_Under_Eyes
4 Bald
5 Bangs
6 Big_Lips
7 Big_Nose
8 Black_Hair
9 Blond_Hair
10 Blurry
11 Brown_Hair
12 Bushy_Eyebrows
13 Chubby
14 Double_Chin
15 Eyeglasses
16 Goatee
17 Gray_Hair
18 Heavy_Makeup
19 High_Cheekbones
20 Male
21 Mouth_Slightly_Open
22 Mustache
23 Narrow_Eyes
24 No_Beard
25 Oval_Face
26 Pale_Skin
27 Pointy_Nose
28 Receding_Hairline
29 Rosy_Cheeks
30 Sideburns
31 Smiling
32 Straight_Hair
33 Wavy_Hair
34 Wearing_Earrings
35 Wearing_Hat
36 Wearing_Lipstick
37 Wearing_Necklace
38 Wearing_Necktie
39 Young


Unnamed: 0,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,...,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
000001.jpg,0,1,1,0,0,0,0,0,0,0,...,0,1,1,0,1,0,1,0,0,1
000002.jpg,0,0,0,1,0,0,0,1,0,0,...,0,1,0,0,0,0,0,0,0,1
000003.jpg,0,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,1
000004.jpg,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,1,0,1,1,0,1
000005.jpg,0,1,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,1


In [8]:
class CelebADataset(Dataset):
    def __init__(self, df, root_path, transform=None):
        self.img_dir = root_path+"celeba/img_align_celeba/"
        self.img_names = df.index.values
        self.y =df.values
        self.transform = transform
        self.df_box=pd.read_csv('../celeba/list_bbox_celeba.txt', sep="\s+", skiprows=1,index_col=0)
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.img_dir,
                                      self.img_names[index]))
        box=self.df_box.loc[self.img_names[index]]
        img = img.crop([int(box[0]),int(box[1]),int(box[0]+box[2]),int(box[1]+box[3])])
        if self.transform is not None:
            img = self.transform(img)

        label = self.y[index]
        return img, label

    def __len__(self):
        return self.y.shape[0]

In [9]:
#加载原始数据集
celebA_train_dataset=CelebADataset(df_train,"../",transform_train)
celebA_val_dataset=CelebADataset(df_test,"../",transform_train)

#取部分数据集
# celebA_train_dataset=random_split(celebA_train_dataset,
#                                   lengths=[int(len(celebA_train_dataset)*0.25),len(celebA_train_dataset)-int(len(celebA_train_dataset)*0.25)])[0]

celebA_train_loader=DataLoader(dataset = celebA_train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
celebA_val_loader=DataLoader(dataset = celebA_val_dataset,
                              batch_size=batch_size,
                             shuffle=True)

In [10]:
print("训练集长度："+str(len(celebA_train_dataset)) )
print("验证集长度："+str(len(celebA_val_dataset)) )

训练集长度：162770
验证集长度：19962


In [9]:
df_train_Young0Male0=df_train[(df_train['Smiling']==0) & (df_train['Young']==0)]
df_train_Young0Male1=df_train[(df_train['Smiling']==0) & (df_train['Young']==1)]
df_train_Young1Male0=df_train[(df_train['Smiling']==1) & (df_train['Young']==0)]
df_train_Young1Male1=df_train[(df_train['Smiling']==1) & (df_train['Young']==1)]
print(len(df_train_Young0Male0))
print(len(df_train_Young0Male1))
print(len(df_train_Young1Male0))
print(len(df_train_Young1Male1))

17667
67023
18315
59765


In [None]:
df_test_Male0=df_test[df_test['Male']==0]
df_test_Male1=df_test[df_test['Male']==1]
df_test_Young0=df_test[df_test['Young']==0]
df_test_Young1=df_test[df_test['Young']==1]
df_test_Bald0=df_test[df_test["Bald"]==0]
df_test_Bald1=df_test[df_test["Bald"]==1]
df_test_Eyeglasses0=df_test[df_test["Eyeglasses"]==0]
df_test_Eeyeglasses1=df_test[df_test["Eyeglasses"]==1]
df_test_Wearing_earrings0=df_test[df_test["Wearing_Earrings"]==0]
df_test_Wearing_earrings1=df_test[df_test["Wearing_Earrings"]==1]
df_test_Receding_hairing0=df_test[df_test["Receding_Hairline"]==0]
df_test_Receding_hairing1=df_test[df_test["Receding_Hairline"]==1]
Male0_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Male0,"../",transform_train),batch_size=batch_size)
Male1_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Male1,"../",transform_train),batch_size=batch_size)
Young0_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Young0,"../",transform_train),batch_size=batch_size)
Young1_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Young1,"../",transform_train),batch_size=batch_size)
Bald0_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Bald0,"../",transform_train),batch_size=batch_size)
Bald1_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Bald1,"../",transform_train),batch_size=batch_size)
Eyeglasses0_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Eyeglasses0,"../",transform_train),batch_size=batch_size)
Eyeglasses1_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Eeyeglasses1,"../",transform_train),batch_size=batch_size)
Wearing_earrings0_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Wearing_earrings0,"../",transform_train),batch_size=batch_size)
Wearing_earrings1_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Wearing_earrings1,"../",transform_train),batch_size=batch_size)
Receding_hairing0_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Receding_hairing0,"../",transform_train),batch_size=batch_size)
Receding_hairing1_test_DataLoader=DataLoader(dataset=CelebADataset(df_test_Receding_hairing1,"../",transform_train),batch_size=batch_size)

In [None]:
def shuffle_dataset(dataset,target_index):
    '''
    打乱数据集，把目标标签取反
    :param dataset:
    :param target_index: 标签下标
    :return:
    '''
    dataset=copy.deepcopy(dataset)
    for i,(data,target) in enumerate(dataset):
        dataset[i][1][target_index]=target[target_index]^1 #取反操作
    return dataset

In [None]:
#标签的下标
target_index=20
#身份标签的下标
group_index=20
#shuffle数据集的长度
shuffle_len=len(celebA_train_dataset)/10

In [None]:
indices=get_split_indices(celebA_train_dataset,target_index)
celebA_train_target1_dataset=Subset(celebA_train_dataset,indices[0])
celebA_train_target2_dataset=Subset(celebA_train_dataset,indices[1])
#构造一个标签平衡的训练集
length=15000
# celebA_train_dataset=ConcatDataset([random_split(celebA_train_target1_dataset,[length,len(celebA_train_target1_dataset)-length])[0],random_split(celebA_train_target2_dataset,[length,len(celebA_train_target2_dataset)-length])[0]])
# celebA_train_loader=DataLoader(dataset = celebA_train_dataset,
#                               batch_size=batch_size,
#                               shuffle=True)
print(len(celebA_train_target1_dataset))
print(len(celebA_train_target2_dataset))

In [None]:
# 训练模型的方法定义
def test(loader, net,target_index):
    net.eval()
    acc = 0.0
    sum = 0.0
    loss_sum = 0
    for batch, (data, target) in enumerate(loader):
        data, target = data.to(device), target[:,target_index].to(device)
        output = net(data)
        loss = criterion(output, target)
        loss_sum += loss.item()
        _, predicted = output.max(1)
        sum += target.size(0)
        acc += predicted.eq(target).sum().item()
        # acc += torch.sum(torch.argmax(output, dim=1) == target).item()
        # sum += len(target)
        # loss_sum += loss.item()
    print('test  acc: %.2f%%, loss: %.4f' % (100 * acc / sum, loss_sum / (batch + 1)))
    return 100 * acc / sum, loss_sum / (batch + 1)

def train(loader, model, target_index, training_type):
    '''
    :param loader:
    :param model:
    :param target_index: 标签下标
    :param training_type: 模型名称
    :return:
    '''
    model.train()
    acc = 0.0
    sum = 0.0
    loss_sum = 0

    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
    for batch, (data, target) in tqdm.tqdm( enumerate(loader),desc="模型训练中：", total=len(loader)):
        data, target = data.to(device), target[:,target_index].type(torch.LongTensor).to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()
        _, predicted = output.max(1)
        sum += target.size(0)
        acc += predicted.eq(target).sum().item()

    print('train acc: %.2f%%, loss: %.4f' % (100 * acc / sum, loss_sum / (batch + 1)))
    torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, "../models/" + str(training_type) + "_checkpoint.pth")

def load_model(model_path):
    #加载模型
    net = VGG('VGG16').to(device)
    checkpoint = torch.load(model_path)
    net.load_state_dict(checkpoint['model_state_dict'])
    return net
def CelebA_test(model,target_index):
    print("全部测试集：")
    acc,loss=test(celebA_val_loader,model,target_index=target_index)
    print("性别1测试集：")
    test(Male0_test_DataLoader,model,target_index=target_index)
    print("性别2测试集：")
    test(Male1_test_DataLoader,model,target_index=target_index)
    print("Young0测试集：")
    test(Young0_test_DataLoader,model,target_index=target_index)
    print("Young1测试集：")
    test(Young1_test_DataLoader,model,target_index=target_index)
    print("Bald0测试集：")
    test(Bald0_test_DataLoader,model,target_index=target_index)
    print("Bald1测试集：")
    test(Bald1_test_DataLoader,model,target_index=target_index)
    print("Wearing_earrings0测试集：")
    test(Wearing_earrings0_test_DataLoader,model,target_index=target_index)
    print("Wearing_earrings1测试集：")
    test(Wearing_earrings1_test_DataLoader,model,target_index=target_index)
    print("Receding_hairing0测试集：")
    test(Receding_hairing0_test_DataLoader,model,target_index=target_index)
    print("Receding_hairing1测试集：")
    test(Receding_hairing1_test_DataLoader,model,target_index=target_index)
    return acc
def load_new_model():
    net = VGG(net_name)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = net.to(device)
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs")
        net = nn.DataParallel(net)
    return net

In [None]:
net=load_model("../models/VGG16_origin_gender_checkpoint.pth")
CelebA_test(net,30)
net=load_model("../models/VGG16_origin_smiling_checkpoint.pth")
CelebA_test(net,31)
net=load_model("../models/VGG16_origin_biglips_checkpoint.pth")
CelebA_test(net,6)
net=load_model("../models/VGG16_origin_bignose_checkpoint.pth")
CelebA_test(net,7)
net=load_model("../models/VGG16_origin_wavyhair_checkpoint.pth")
CelebA_test(net,33)

In [15]:
%%time
#原始训练
net=load_new_model()
for epoch in range(200):
        print('epoch: %d' % epoch)
        train(celebA_train_loader,net,target_index=30,training_type="VGG16_origin_gender")
        acc=CelebA_test(net,target_index=30)
        if acc>90:
            print("epoch:"+str(epoch))
            break

Using 2 GPUs
epoch: 0


模型训练中：: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1272/1272 [07:02<00:00,  3.01it/s]


train acc: 94.33%, loss: 0.2161
全部测试集：
test  acc: 95.38%, loss: 0.1876
epoch:0
CPU times: user 1h 46min 13s, sys: 41.6 s, total: 1h 46min 55s
Wall time: 7min 42s


In [None]:
net=load_new_model()
for epoch in range(10):
        print('epoch: %d' % epoch)
        train(celebA_train_loader,net,target_index=31,training_type="VGG16_origin_smiling")
        acc=CelebA_test(net,target_index=31)
        if acc>90:
            print("epoch:"+str(epoch))
            break
net=load_new_model()
for epoch in range(10):
        print('epoch: %d' % epoch)
        train(celebA_train_loader,net,target_index=6,training_type="VGG16_origin_biglips")
        acc=CelebA_test(net,target_index=6)
        if acc>90:
            print("epoch:"+str(epoch))
            break

net=load_new_model()
for epoch in range(10):
        print('epoch: %d' % epoch)
        train(celebA_train_loader,net,target_index=7,training_type="VGG16_origin_bignose")
        acc=CelebA_test(net,target_index=7)
        if acc>90:
            print("epoch:"+str(epoch))
            break
net=load_new_model()
for epoch in range(10):
        print('epoch: %d' % epoch)
        train(celebA_train_loader,net,target_index=33,training_type="VGG16_origin_wavyhair")
        acc=CelebA_test(net,target_index=33)
        if acc>90:
            print("epoch:"+str(epoch))
            break

Using 2 GPUs
epoch: 0


模型训练中：:  15%|████████████████████▍                                                                                                                | 195/1272 [01:04<05:52,  3.06it/s]

In [56]:
#混淆训练,celebA数据集target为biglips
for epoch in range(2):
        print('epoch: %d' % epoch)
        train(celebA_train_shuffle_biglips_dataloader,net,target_index=target_index,training_type="VGG16(224*224)_shuffle_smiling")

epoch: 0


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:27<00:00,  2.91it/s]


train acc: 64.74%, loss: 0.6459
epoch: 1


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:28<00:00,  2.90it/s]


train acc: 71.83%, loss: 0.5081


In [57]:
net=load_model('../models/VGG16(224*224)_shuffle_smiling_checkpoint.pth')

In [58]:
CelebA_test(net,target_index=target_index)

全部测试集：
test  acc: 33.74%, loss: 2.0693
性别1测试集：
test  acc: 33.47%, loss: 2.1568
性别2测试集：
test  acc: 34.91%, loss: 1.9318
标签1测试集：
test  acc: 38.15%, loss: 0.7709
标签2测试集：
test  acc: 29.84%, loss: 3.3945


In [63]:
#恢复训练,celebA数据集target为biglips
for epoch in range(10):
        print('epoch: %d' % epoch)
        train(celebA_train_balance_dataloader,net,target_index=target_index,training_type="VGG16(224*224)_balance_smiling")

epoch: 0


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:29<00:00,  2.86it/s]


train acc: 88.19%, loss: 0.2132
epoch: 1


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:28<00:00,  2.87it/s]


train acc: 88.63%, loss: 0.2047
epoch: 2


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:28<00:00,  2.87it/s]


train acc: 88.84%, loss: 0.1975
epoch: 3


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:28<00:00,  2.87it/s]


train acc: 88.89%, loss: 0.1970
epoch: 4


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:29<00:00,  2.86it/s]


train acc: 89.01%, loss: 0.1938
epoch: 5


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:29<00:00,  2.85it/s]


train acc: 89.12%, loss: 0.1897
epoch: 6


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:29<00:00,  2.85it/s]


train acc: 89.10%, loss: 0.1882
epoch: 7


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:28<00:00,  2.87it/s]


train acc: 89.12%, loss: 0.1880
epoch: 8


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:29<00:00,  2.86it/s]


train acc: 89.14%, loss: 0.1881
epoch: 9


模型训练中：: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 255/255 [01:28<00:00,  2.87it/s]


train acc: 89.17%, loss: 0.1874


In [61]:
net=load_model('../models/VGG16(224*224)_balance_smiling_checkpoint.pth')

In [64]:
CelebA_test(net,target_index=target_index)

全部测试集：
test  acc: 72.26%, loss: 0.9061
性别1测试集：
test  acc: 70.07%, loss: 0.8734
性别2测试集：
test  acc: 75.74%, loss: 0.9761
标签1测试集：
test  acc: 82.18%, loss: 0.9227
标签2测试集：
test  acc: 61.94%, loss: 0.8948
