In [None]:

'''
dataFrame 为训练样本和validation样本共同组成的dataFrame
testPath是测试样本所在的文件夹
outputPath是提取的feature文件输出的文件夹
network为提取特征使用的网络结构
'''
def  getFeatures(dataframe,testPath,outputPath,network,batchsize=batch_size,cudaDevice='3'):
    if network == 'resnet18':
        model_conv = torchvision.models.resnet18(pretrained=True)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])#python中单个星号表示该位置接收任意多个非关键字参数 并转化为元表
        featurenum=512
    elif network=='resnet34':
        model_conv = torchvision.models.resnet34(pretrained=True)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])
        featurenum = 512
    elif network=='resnet50':
        model_conv = torchvision.models.resnet50(pretrained=True)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])
        featurenum = 2048
    elif network=='resnet152':
        model_conv = torchvision.models.resnet152(pretrained=True)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])
        featurenum = 2048
    elif network == 'vgg19':
        model_conv = torchvision.models.vgg19(pretrained=True)
        model_conv.classifier = nn.Sequential(*list(model_conv.classifier.children())[:-1])
        featurenum = 4096
    elif network == 'densenet161':
        model_conv = torchvision.models.densenet161(pretrained=True)
        model_conv.classifier = nn.Sequential(*list(model_conv.classifier.children())[:-1])
        featurenum = 2208
    elif network == 'densenet169':
        model_conv = torchvision.models.densenet169(pretrained=True)
        model_conv.classifier = nn.Sequential(*list(model_conv.classifier.children())[:-1])
        featurenum = 1664
    elif network== 'inception_v3':
        model_conv = torchvision.models.inception_v3(pretrained = True,transform_input=False)
        featurenum = 1000
    if type(cudaDevice) is not str and type(cudaDevice) is not list:
        raise Exception('Error type of the CUDA Device')
    if type(cudaDevice) is str:
        os.environ["CUDA_VISIBLE_DEVICES"] = cudaDevice
        model_conv.cuda()
    if type(cudaDevice) is list:
        model_conv=nn.DataParallel(model_conv,device_ids=cudaDevice)
    model_conv.eval()
    if network == 'inception_v3':
        train_transform=transforms.Compose([
            transforms.Resize(320),
            transforms.CenterCrop(299),
            transforms.ToTensor(),
#             transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
#                                  std = [ 0.229, 0.224, 0.225 ]) # 这里的mean和std对应的都是Imagenet中的mean和std
        ])
    else:
         train_transform=transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
#             transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],
#                                  std = [ 0.229, 0.224, 0.225 ])
        ]) #可以增加Normolization
    train_feature = []
    for idx in range(0, dataframe.shape[0], batchsize):
        if idx + batchsize < dataframe.shape[0]:
            ff = read_img(dataframe['image_id'].iloc[idx: idx + batchsize].values) #获取dataFrame中某一属性的Series 在某一范围内的对象 并获取其valus 表示为array
            ff = [train_transform(x) for x in ff]
            ff = torch.stack(ff)
            ff = model_conv(Variable(ff.cuda())).view(-1, featurenum)
            train_feature.append(ff.data.cpu().numpy())
            del ff; gc.collect()
        else:
            ff = read_img(dataframe['image_id'].iloc[idx: ].values)
            ff = [train_transform(x) for x in ff]
            ff = torch.stack(ff)
            ff = model_conv(Variable(ff.cuda())).view(-1, featurenum)
            train_feature.append(ff.data.cpu().numpy())
            del ff; gc.collect()
        print('Train', idx, train_val.shape[0])
    train_feature = np.array(train_feature)
    test=os.listdir(testPath)
    test=[testPath+x for x in test]
    test_feature=[]
    for idx in range(0, len(test), batchsize):
        if idx + batchsize < len(test):
            ff = read_img(test[idx: idx + batchsize])
            ff = [train_transform(x) for x in ff]
            ff = torch.stack(ff)
            ff = model_conv(Variable(ff.cuda())).view(-1, featurenum)
            test_feature.append(ff.data.cpu().numpy())
            del ff; gc.collect()
        else:
            ff = read_img(test[idx: ])
            ff = [train_transform(x) for x in ff]
            ff = torch.stack(ff)
            ff = model_conv(Variable(ff.cuda())).view(-1, featurenum)
            test_feature.append(ff.data.cpu().numpy())
            del ff; gc.collect()
        print('Test', idx, len(test))
    test_feature = np.array(test_feature)
    train_feature = np.concatenate(train_feature, 0).reshape(-1, featurenum)
    test_feature = np.concatenate(test_feature, 0).reshape(-1, featurenum)
    with h5py.File(outputPath+network+'.h5', "w") as f:
        f.create_dataset("train_feature", data=train_feature)
        f.create_dataset("test_feature", data=test_feature)

In [None]:

'''
通过提取到的特征训练最后一层全连接层 可以替换成SVM等
featurefile 表示提取的特征所在的目录
testPath: 测试图像所在的目录
model：训练的模型名称 这里使用上述定义的modelnn

'''    
def trainLastLinear(feature_file,testPath,epochsize=80):
    train_feat, test_feat = [], []
    for ffile in os.listdir(feature_file):
        with h5py.File(feature_file+ffile, "r") as f:
            train_feat.append(f['train_feature'][:])
            test_feat.append(f['test_feature'][:])
    train_feat = np.concatenate(train_feat, 1)#在列上进行合并 相当于把[[1,2,3],[4,5,6]]和[[1,2],[3,4]]合并为[[1,2,3,1,2],[4,5,6,3,4]]
    test_feat = np.concatenate(test_feat, 1)
    print('Feature:', train_feat.shape)
    print(feature_file)
    skf=StratifiedKFold(n_splits=6)
    train_preds, test_preds = np.zeros(train_feat.shape[0]), []
    train_logs = [[], [], [], []]
    for train_index, test_index in skf.split(train_feat, train_val['disease_class']):
        X_train, X_test = train_feat[train_index, :], train_feat[test_index, :]
        y_train, y_test = train_val['disease_class'].values[train_index], train_val['disease_class'].values[test_index]
        train_set = ArrayLoader(X_train, y_train)
        train_loader = torch.utils.data.DataLoader(train_set, batch_size = 64, shuffle=True, num_workers=4)
        val_set = ArrayLoader(X_test, y_test)
        val_loader = torch.utils.data.DataLoader(val_set, batch_size = 64, shuffle=True, num_workers=4)
        model=modelnn(train_feat.shape[1])
        model=model.cuda()
        criterion = nn.CrossEntropyLoss().cuda()
        optimizer_ft = torch.optim.SGD(model.parameters(), lr = 0.0001, momentum = 0.75, weight_decay = 1e-4)
        for epoch in range(epochsize):
            adjust_learning_rate(optimizer_ft, epoch)
            running_corrects = 0.0
            running_loss = 0.0
            for data in train_loader:
                dta_x, dta_y = data
                dta_x, dta_y = Variable(dta_x.cuda()), Variable(dta_y.cuda().view(dta_y.size(0)))
                optimizer_ft.zero_grad()
                outputs = model(dta_x)
                _, preds = torch.max(outputs.data, 1)
#                 print(dta_y)
#                 print(preds)
                loss = criterion(outputs, dta_y)
                loss.backward()
                optimizer_ft.step()
                running_loss += loss.item()
                running_corrects += torch.sum(preds == dta_y.data)
            train_loss = running_loss / len(train_set)
            train_acc = float(running_corrects) / len(train_set)
            running_corrects = 0.0
            running_loss = 0.0
            for data in val_loader:
                dta_x,dta_y=data
                dta_x, dta_y = Variable(dta_x.cuda()), Variable(dta_y.cuda().view(dta_y.size(0)))
                outputs = model(dta_x)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, dta_y)
                running_loss += loss.item()
                running_corrects += torch.sum(preds == dta_y.data)
            val_loss = running_loss / len(val_set)
            val_acc = float(running_corrects) / len(val_set)
            epoch_log = '[%d/%d] Loss %.6f/%.6f Acc %.6f/%.6f' % (epoch, epochsize, train_loss, val_loss, train_acc, val_acc)
            print(epoch_log)
        val_set = ArrayLoader(X_test, y_test)
        val_loader = torch.utils.data.DataLoader(val_set, batch_size = 1, shuffle = False, num_workers=4)
        val_pred = []
        for data in val_loader:
            dta_x, _ = data
            dta_x = Variable(dta_x.cuda())
            outputs = model(dta_x)
            _, preds = torch.max(outputs.data, 1)
            val_pred.append(preds.cpu().numpy()[0])       
        train_preds[test_index] = val_pred
        train_logs[0].append(train_loss)
        train_logs[1].append(val_loss)
        train_logs[2].append(train_acc)
        train_logs[3].append(val_acc)
        print('Val:', sum(train_preds[test_index] == y_test) * 1.0 / len(y_test))
        test_set = ArrayLoader(test_feat, np.zeros_like(test_feat))
        test_loader = torch.utils.data.DataLoader(test_set, batch_size = 1, shuffle = False, num_workers=4)
        test_pred = []
        for data in test_loader:
            dta_x, _ = data
            dta_x = Variable(dta_x.cuda())
            outputs = model(dta_x)
            _, preds = torch.max(outputs.data, 1)
            test_pred.append(preds.cpu().numpy()[0])
        test_preds.append(test_pred)
#         test=[x for x in os.listdir(testPath)]
#         with codecs.open('test_.txt' + str(train_logs[3][-1]), 'w') as f:
#             for i in range(len(test)):
#                 f.write(str(test_pred[i])  + '\t' + test[i] + '\n')
    print('+++ Loss %.6f/%.6f Acc %.6f/%.6f' % (np.mean(train_logs[0]), np.mean(train_logs[1]), np.mean(train_logs[2]), np.mean(train_logs[3])))
    test_preds=np.array(test_preds)
    print('test_preds shape is :',test_preds.shape)
    result=[np.argmax(np.bincount(test_preds[:,i])) for i in range(0,test_preds.shape[1])]
    uploadFile=[]
    for index,img in enumerate(os.listdir(testPath)):
        a={"image_id":img,"disease_class":int(result[index])}
        uploadFile.append(a.copy())
    with open("result.json",'w') as f:
        json.dump(uploadFile,f,ensure_ascii=False)