In [1]:
!nvidia-smi # 查看GPU信息

Sat Jun  6 14:19:46 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   74C    P0    86W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# 进入需要训练项目的文件夹
%cd /content/drive/My Drive/Colab Notebooks/ML2020_spring_crack_detection/CrackU-Net-cross-entropy

/content/drive/My Drive/Colab Notebooks/ML2020_spring_crack_detection/CrackU-Net-cross-entropy


In [4]:
!pip install xlutils



In [0]:
from models_crack_unet import SegmentNet, weights_init_normal
from dataset_crack_unet import CFDDataset

import torch.nn as nn
import torch

from torchvision import datasets
from torchvision.utils import save_image
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.utils.data import DataLoader

import os
import sys
import argparse
import time
import PIL.Image as Image

import numpy as np

import xlwt
import xlrd
from xlutils.copy import copy

In [18]:
# 在ipynb文件中，parse的创建用函数来创建
# 直接用parser=parser = argparse.ArgumentParser() 来创建之后调用会报错

def get_arguments():
    parser = argparse.ArgumentParser()

    parser.add_argument("--cuda", type=bool, default=True, help="number of gpu")
    parser.add_argument("--gpu_num", type=int, default=1, help="number of gpu")
    parser.add_argument("--worker_num", type=int, default=0, help="number of input workers") # 只有一个GPU,default=0表示单进程加载
    parser.add_argument("--batch_size", type=int, default=4, help="batch size of input")
    parser.add_argument("--lr", type=float, default=0.0005, help="adam: learning rate")
    parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
    parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")

    parser.add_argument("--begin_epoch", type=int, default=0, help="begin_epoch")
    parser.add_argument("--end_epoch", type=int, default=101, help="end_epoch")

    parser.add_argument("--need_test", type=bool, default=True, help="need to test")
    parser.add_argument("--test_interval", type=int, default=2, help="interval of test")
    parser.add_argument("--need_save", type=bool, default=True, help="need to save")
    parser.add_argument("--save_interval", type=int, default=2, help="interval of save weights")


    parser.add_argument("--img_width", type=int, default=480, help="size of image width")
    parser.add_argument("--img_height", type=int, default=320, help="size of image height")
    
    return parser.parse_args(args=[])

opt = get_arguments()

print(opt)

Namespace(b1=0.5, b2=0.999, batch_size=4, begin_epoch=0, cuda=True, end_epoch=101, gpu_num=1, img_height=320, img_width=480, lr=0.0005, need_save=True, need_test=True, save_interval=2, test_interval=2, worker_num=0)


In [0]:
dataSetRoot = "../Data" 

In [0]:
# 建立网络
segment_net = SegmentNet(init_weights=True)

# 选择二分类交叉熵损失函数
criterion_segment  = torch.nn.BCELoss()

In [0]:
# 选择训练环境和参数
if opt.cuda:
    segment_net = segment_net.cuda()
    criterion_segment.cuda()

if opt.gpu_num > 1:
    segment_net = torch.nn.DataParallel(segment_net, device_ids=list(range(opt.gpu_num)))

if opt.begin_epoch != 0:
    # 加载前期训练的模型
    segment_net.load_state_dict(torch.load("./saved_models/segment_net_%d.pth" % (opt.begin_epoch)))
else:
    # 初始化权重
    segment_net.apply(weights_init_normal)

In [0]:
# 选择Adam优化器
optimizer_seg = torch.optim.Adam(segment_net.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

In [0]:
# 对原始数据和真实值进行一定前期处理，方便后续训练
transforms_ = transforms.Compose([
    transforms.Resize((opt.img_height, opt.img_width), Image.BICUBIC),
    transforms.ToTensor()
])

transforms_mask = transforms.Compose([
    transforms.Resize((opt.img_height, opt.img_width)), 
    transforms.ToTensor()
])

In [0]:
# 加载训练集和测试集
trainCFDloader = DataLoader(
    CFDDataset(dataSetRoot, transforms_=transforms_, transforms_mask= transforms_mask, subFold="CFD", isTrain=True),
    batch_size=opt.batch_size,
    shuffle=True,
    num_workers=opt.worker_num,
)

testloader = DataLoader(
    CFDDataset(dataSetRoot, transforms_=transforms_, transforms_mask= transforms_mask,  subFold="CFD/cfd_TEST", isTrain=True),
    batch_size=1,
    shuffle=False,
    num_workers=opt.worker_num
)

In [0]:
# 定义对输出结果进行阈值化处理的函数，将小于阈值的计算为0，大于阈值的计算为1，在图像中像素为1的点为白色，为0的点为黑色
def data_threshold(data, threshold):
    threshold = torch.Tensor([threshold]).cuda()
    data_target = torch.Tensor([i//threshold for i in data]).cuda()
    return data_target

In [0]:
# 获取训练之后的 accuracy, precision , recall ,F1评价指标
# mask是真实值，data是预测值
def evaluate_metric(mask, data):
    
    count_TP, count_FN, count_FP, count_TN = 0, 0, 0, 0
    
    for i in range(len(mask)):
        if mask[i]==1 and data[i]==1:
            count_TP += 1  
        elif mask[i]==1:
            count_FN += 1      
        elif data[i]==1:
            count_FP += 1      
        else:
            count_TN += 1

    count = count_TP + count_FN + count_FP +count_TN
    
    # 准确率
    accuracy = (count_TP+count_TN)/count 
    # 精准率
    precision = count_TP / (count_TP + count_FP) 
    # 查全率
    recall = count_TP / (count_TP + count_FN)
    # F1分
    F1 = 2*count_TP/(2*count_TP + count_FP + count_FN)
    
    return accuracy, precision, recall, F1

In [0]:
# 将训练过程的需要保存的数据保存到xls文件中
# 创建一个workbook，设置编码
workbook = xlwt.Workbook(encoding = 'utf-8')

#---------------------写入训练过程的epoch, loss, accuracy------------------
# 创建一个worksheet
worksheet = workbook.add_sheet('sheet1')
worksheet.write(0, 0, 'epoch')
worksheet.write(0, 1, 'loss')
worksheet.write(0, 2, 'accuracy')

#---------------------写入测试过程的epoch, loss, accuracy------------------
worksheet = workbook.add_sheet('sheet2')
worksheet.write(0, 0, 'epoch')
worksheet.write(0, 1, 'loss')
worksheet.write(0, 2, 'accuracy')
                
#---------------------写入测试过程的epoch, accuracy, precision, recall, F1------------------                
worksheet = workbook.add_sheet('sheet3')
worksheet.write(0, 0, 'epoch')
worksheet.write(0, 1, 'accuracy')
worksheet.write(0, 2, 'precision')
worksheet.write(0, 3, 'recall')
worksheet.write(0, 4, 'F1')

# 保存
workbook.save('evaluate_data.xls')

def write_excel_xls_append(path, value, sheet_num):
  index = len(value)  # 获取需要写入数据的行数
  workbook = xlrd.open_workbook(path)            # 打开工作簿
  sheets = workbook.sheet_names()                # 获取工作簿中的所有表格
  worksheet = workbook.sheet_by_name(sheets[sheet_num])  # 获取工作簿中所有表格中的的第一个表格
  rows_old = worksheet.nrows                 # 获取表格中已存在的数据的行数
  new_workbook = copy(workbook)                # 将xlrd对象拷贝转化为xlwt对象
  new_worksheet = new_workbook.get_sheet(sheet_num)      # 获取转化后工作簿中的第sheet_num个表格
  
  for i in range(0, index):
      new_worksheet.write(rows_old, i, value[i])  # 追加写入数据
  new_workbook.save(path)  # 保存工作簿
  print("xls格式表格[追加]写入数据成功！")

In [0]:
for epoch in range(opt.begin_epoch, opt.end_epoch):

  iterCFD = trainCFDloader.__iter__()

  lenNum = len(trainCFDloader)

  segment_net.train()

  # -----------------------------------------------------------------------------
  # 开始训练
  # 记录每一个epoch的总损失和总精度
  train_loss_sum, train_acc_sum, batch_count = 0.0, 0.0, 0.0

  for i in range(0, lenNum):
      
    batchData = iterCFD.__next__()

    if opt.cuda:
        img = batchData["img"].cuda()
        mask = batchData["mask"].cuda()
    else:
        img = batchData["img"]
        mask = batchData["mask"]

    optimizer_seg.zero_grad()

    rst = segment_net(img)
    seg = rst["seg"]

    # 计算训练过程的损失loss
    loss_seg = criterion_segment(seg, mask)
    loss_seg.backward()
    optimizer_seg.step()

    train_loss_sum += loss_seg.item() 
    
    # 计算训练过程的accuracy
    net_seg = data_threshold(seg.clone().flatten(), 0.6)    # 预测值
    mask_seg = mask.clone().flatten()              # 真实值
    
    # 对每个像素点的值进行比较，相同的点计入right_seg 
    right_seg = torch.eq(net_seg, mask_seg).sum().float().item()
    total_num = float(mask.clone().flatten().size()[0])
    
    batch_acc = right_seg/total_num
    train_acc_sum += batch_acc
    
    batch_count += 1

    # 输出每个epoch之中每个batch的信息
    print("[Epocn:{0}],[batch_count:{1}],[loss:{2:.6f}],[accuracy:{3:.6f}]".format(epoch, batch_count, loss_seg.item(), batch_acc))
    
  # 输出训练过程每个epoch平均的loss和accuracy
  print("[Epoch {0}/{1}], [loss:{2:.6f}], accuracy:{3:.6f}]".format(epoch, opt.end_epoch, train_loss_sum/batch_count, train_acc_sum/batch_count))
    
  # 将上述epoch, loss, accuracy数据写入xls文件
  print("------------------------------------------------------------------------------------------")
  print("开始写入训练过程的epoch, loss, accuracy")
  train_xls_value = [epoch, train_loss_sum/batch_count, train_acc_sum/batch_count]
  write_excel_xls_append("evaluate_data.xls", train_xls_value, 0)
  print("------------------------------------------------------------------------------------------")


  # -----------------------------------------------------------------------------
  # 以一定周期保存训练之后的模型
  if opt.need_save and epoch % opt.save_interval == 0 and epoch >= opt.save_interval:

    save_path_str = "./saved_models"
    if os.path.exists(save_path_str) == False:
        os.makedirs(save_path_str, exist_ok=True)

    torch.save(segment_net.state_dict(), "%s/segment_net_%d.pth" % (save_path_str, epoch))
    print("------------------------------------------------------------------------------------------")
    print("save weights ! epoch = %d" %epoch)
    print("------------------------------------------------------------------------------------------")
    pass
    

  # -----------------------------------------------------------------------------
  # 对模型进行测试，并保存结果
  if opt.need_test and epoch % opt.test_interval == 0 and epoch >= opt.test_interval:

    test_loss_sum, test_acc_sum, batch_count = 0.0, 0.0, 0.0
    result_evaluate_epoch = np.array([0.0, 0.0, 0.0, 0.0])

    for i, testBatch in enumerate(testloader):
      imgTest = testBatch["img"].cuda()
      t1 = time.time()
      rstTest = segment_net(imgTest)
      t2 = time.time()

      # 计算测试过程的损失loss
      mask = testBatch["mask"].cuda()
      loss_test = criterion_segment(rstTest["seg"], mask)

      test_loss_sum += loss_test.item()

      # 计算测试过程的accuracy
      net_seg = data_threshold(rstTest["seg"].clone().flatten(), 0.6)  # 预测值
      mask_seg = mask.clone().flatten()                  # 真实值
      
      # 对每个像素点的值进行比较，相同的点计入right_seg 
      right_seg = torch.eq(net_seg, mask_seg).sum().float().item()
      total_num = float(mask.clone().flatten().size()[0])
      
      batch_acc = right_seg/total_num
      test_acc_sum += batch_acc
      
      batch_count += 1

      # 对一个batch测试结果进行综合评估，并进行累加，方便后续保存
      result_evaluate_batch = np.array(list(evaluate_metric(mask_seg, net_seg)))
      result_evaluate_epoch += result_evaluate_batch
      
      # 对保存的图片进行阈值化处理
      seg_shape = rstTest["seg"].data.shape
      segTest_flatten = data_threshold(rstTest["seg"].flatten(), 0.6)
      segTest = segTest_flatten.reshape(seg_shape[0], seg_shape[1], seg_shape[2], seg_shape[3])

      # 建立文件的保存路径
      save_path_str = "./testResultSeg/epoch_%d"%(epoch)
      if os.path.exists(save_path_str) == False:
          os.makedirs(save_path_str, exist_ok=True)

      # 输出文件的保存信息
      print("processing image NO %d, time comsuption %fs"%(i, t2 - t1))
      save_image(imgTest.data, "%s/img_%d.jpg"% (save_path_str, i))
      save_image(segTest.data, "%s/img_%d_seg.jpg"% (save_path_str, i))

    # 将上述测试过程的评估参数acc,precision,recall和F1分数进行保存
    print("------------------------------------------------------------------------------------------")
    print("开始写入评估参数")
    result_evaluate_epoch = result_evaluate_epoch/np.array([batch_count])
    test_xls_metric = [epoch] + list(result_evaluate_epoch)
    write_excel_xls_append("evaluate_data.xls", test_xls_metric, 2)
    print("------------------------------------------------------------------------------------------")
    
    # 输出测试过程每个epoch平均的loss和accuracy
    print("------------------------------------------------------------------------------------------")
    print("[Epoch {0}/{1}], [loss:{2:.6f}], accuracy:{3:.6f}]".format(epoch, opt.end_epoch, test_loss_sum/batch_count, test_acc_sum/batch_count))
    print("------------------------------------------------------------------------------------------")
    
    # 将上述epoch, loss, accuracy数据写入xls文件
    print("------------------------------------------------------------------------------------------")
    print("开始写入测试过程的epoch, loss, accuracy")
    test_xls_value = [epoch, test_loss_sum/batch_count, test_acc_sum/batch_count]
    write_excel_xls_append("evaluate_data.xls", test_xls_value, 1)
    print("------------------------------------------------------------------------------------------")

  

[Epocn:0],[batch_count:1.0],[loss:0.660183],[accuracy:0.983200]
[Epocn:0],[batch_count:2.0],[loss:0.698520],[accuracy:0.958566]
