In [12]:
# Successive enforcement procedures

%%time # recording time

import sys
import time
import os

import numpy as np
from tqdm import tqdm

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

# 忽略烦人的红色提示/Ignore red alerts
import warnings
warnings.filterwarnings("ignore")

def writePrint():        
    output_content = open(r"OOO.txt", "a")# 写入记事本/Write to Notepad
    sys.stdout = output_content
    
# 有 GPU 就用 GPU，没有就用 CPU/GPU if you have one, CPU if you don't
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)


from torchvision import transforms

# 训练集图像预处理：缩放裁剪、图像增强、转 Tensor、归一化
# Practice set image preprocessing: zoom crop, image enhancement, to Tensor, normalization
train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                     ])

# 测试集图像预处理-RCTN：缩放、裁剪、转 Tensor、归一化
# Test Set Image Preprocessing - RCTN: Scaling, Cropping, Turn Tensor, Normalization
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])


path = 'data/'
for i in os.listdir(path):

    # 数据集文件夹路径/Dataset folder path
    dataset_dir = path + i#'R_split'R-250
    
    for j in range(3):

        # 训练和测试集名/Training and test set names
        train = 'tra'
        val = 'val'

        train_path = os.path.join(dataset_dir, train)
        test_path = os.path.join(dataset_dir, val)
#         print('训练集路径', train_path)
#         print('测试集路径', test_path)

        from torchvision import datasets

        # 载入训练集/Load training set
        train_dataset = datasets.ImageFolder(train_path, train_transform)

        # 载入测试集/Load Test Set
        test_dataset = datasets.ImageFolder(test_path, test_transform)

        # print('训练集图像数量', len(train_dataset))
        # print('类别个数', len(train_dataset.classes))
        # print('各类别名称', train_dataset.classes)

        # print('测试集图像数量', len(test_dataset))
        # print('类别个数', len(test_dataset.classes))
        # print('各类别名称', test_dataset.classes)

        # 各类别名称/Name of each category
        class_names = train_dataset.classes
        n_class = len(class_names)

        # class_names

        # # 映射关系：类别 到 索引号/映射关系：类别 到 索引号
        # train_dataset.class_to_idx

        # 映射关系：索引号 到 类别
        idx_to_labels = {y:x for x,y in train_dataset.class_to_idx.items()}

        # idx_to_labels

        # 保存为本地的 npy 文件/Save as local npy file
        np.save('R_idx_to_labels.npy', idx_to_labels)
        np.save('R_labels_to_idx.npy', train_dataset.class_to_idx)

        from torch.utils.data import DataLoader

        BATCH_SIZE = 32

        # 训练集的数据加载器/Data Loader for Practice Sets
        train_loader = DataLoader(train_dataset,
                                  batch_size=BATCH_SIZE,
                                  shuffle=True,
                                  num_workers=4
                                 )

        # 测试集的数据加载器/Data Loader for Test Sets
        test_loader = DataLoader(test_dataset,
                                 batch_size=BATCH_SIZE,
                                 shuffle=False,
                                 num_workers=4
                                )

        # DataLoader 是 python生成器，每次调用返回一个 batch 的数据
        # DataLoader is a python generator that returns a batch of data per call.
        images, labels = next(iter(train_loader))

        # images.shape

        # labels

        # 将数据集中的Tensor张量转为numpy的array数据类型
        # Converting the Tensor tensor in a dataset to numpy's array data type
        images = images.numpy()

        images[5].shape

        # plt.hist(images[5].flatten(), bins=50)
        # plt.show()

        # batch 中经过预处理的图像
        # idx = 1
        # plt.imshow(images[idx].transpose((1,2,0))) # 转为(224, 224, 3)
        # plt.title('label:'+str(labels[idx].item()))


        # 选择微调/Selecting Fine Tuning
        from torchvision import models
        import torch.optim as optim

        model = models.resnet18(pretrained=True) # 载入预训练模型/Load pre-trained model

        # 修改全连接层，使得全连接层的输出与当前数据集类别数对应/Modify the full connectivity layer
        # 新建的层默认 requires_grad=True
        model.fc = nn.Linear(model.fc.in_features, n_class)

        # 只微调训练最后一层全连接层的参数，其它层冻结
        # Fine-tune only the parameters of the last fully-connected layer of training and freeze the other layers
        optimizer = optim.Adam(model.fc.parameters())




        model = model.to(device)

        # 交叉熵损失函数/Cross Entropy Loss Function
        criterion = nn.CrossEntropyLoss() 

        # 训练轮次 Epoch/Training rounds Epoch
        EPOCHS = 20

#         %%time
        # 遍历每个 EPOCH/Each EPOCH
        for epoch in tqdm(range(EPOCHS)):

            model.train()

            for images, labels in train_loader:  # 获得一个 batch 的数据和标注
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels) # 计算当前 batch 中，每个样本的平均交叉熵损失函数值

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
#         writePrint()
#         print(i)

#         %%time

        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in tqdm(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum()

            writePrint()
            print(i)
            print('测试集上的准确率为 {:.3f} %'.format(100 * correct / total))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [08:29<00:00, 25.48s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:05<00:00, 17.40it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [08:32<00:00, 25.62s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:05<00:00, 17.29it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [08:34<00:00, 25.73s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:05<00:00, 16.70it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [08:53<00:00, 26.65s/it]
100%|████████

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [08:34<00:00, 25.73s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:05<00:00, 17.32it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [08:40<00:00, 26.02s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:05<00:00, 17.30it/s]
