In [1]:
#用到的套件

from __future__ import print_function, division
import os  
import pandas as pd
import requests
import numpy as np
import re
import math
import torchvision
import cv2
import glob
import random
from pathlib import Path
from torchvision import datasets,transforms
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from efficientnet_pytorch import EfficientNet
import time
import argparse
from time import sleep
from tqdm import tqdm, trange
from PIL import Image
from torch.autograd import Variable
import torch.nn.functional as FUN
from scipy import io
import efficientnet_pytorch
import torchvision.transforms as T
import PIL
import pickle
import torchvision.datasets as dsets
from scipy.misc import imsave

In [2]:
#定義function

def load_file(filename):
    with open(filename, 'rb') as fo:
        data = pickle.load(fo, encoding='latin1')
    return data

# 解壓縮，返回解壓後的字典
def unpickle(file):
    fo = open(file, 'rb')
    dict = pickle.load(fo, encoding='latin1')
    fo.close()
    return dict

#補邊,填充成正方形，防止resize變形
def expend_img(img):
    '''
    :param img: 图片数据
    :return:
    '''
    fill_pix=[0,0,0] #填充色素，可自己設定
    h,w=img.shape[:2]
    if h>=w: #左右填充
        padd_width=int(h-w)//2
        padd_top,padd_bottom,padd_left,padd_right=0,0,padd_width,padd_width #各個方向的填充像素
    elif h<w: #上下填充
        padd_high=int(w-h)//2
        padd_top,padd_bottom,padd_left,padd_right=padd_high,padd_high,0,0 #各個方向的填充像素
    new_img = cv2.copyMakeBorder(img,padd_top,padd_bottom,padd_left,padd_right,cv2.BORDER_CONSTANT, value=fill_pix)
    return new_img

#對影像做基本的旋轉 和亮度 對比度 飽和度的調整
def expend1_img(img):
    '''
    :param img: 圖片數據
    :return:
    '''
    a = random.random()
    
    t2 = transforms.RandomHorizontalFlip(p=0.5)  # 水平镜像，p是機率
    t3 = transforms.RandomVerticalFlip(p=0.2) #垂直鏡像
    # print(type(img))
    img = t2(img)
    img = t3(img)
    if a<0.4:
        t1 = transforms.RandomRotation(45)  # 随機旋轉，旋轉範圍為【-45,45】
        img = t1(img)

    t4 = transforms.ColorJitter(brightness=(0.8,1.5), contrast=(0.8,1.5), saturation=(0.8,1.5))#調整亮度  對比度  飽和度
    img = t4(img)
    return img

#對圖片做透視轉換
def expend2_img(img):
    t = transforms.RandomPerspective(distortion_scale=0.6,p=1,interpolation = 2,fill=0) #圖片透視化
    img2 = t(img)
    return img2

#切分訓練集和測試集，並進行補邊處理
def split_train_test(img_dir,save_dir,train_val_num):
    '''
    :param img_dir: 原始图片路径，注意是所有类别所在文件夹的上一级目录
    :param save_dir: 保存图片路径
    :param train_val_num: 切分比例
    :return:
    '''

    img_dir_list=glob.glob(img_dir+os.sep+"*")#獲取每個類别所在的路徑（一個類别對應一個文件夾）
    for class_dir in img_dir_list:
        class_name=class_dir.split(os.sep)[-1] #獲取當前類别
        img_list=glob.glob(class_dir+os.sep+"*") #獲取每個類别文件夾下的所有圖片
        all_num=len(img_list) #獲取總個數
        train_list=random.sample(img_list,int(all_num*train_val_num)) #訓練集圖片所在路徑
        save_train=save_dir+os.sep+'train'+os.sep+class_name
        save_val=save_dir+os.sep+"val"+os.sep+class_name
        os.makedirs(save_train,exist_ok=True)
        os.makedirs(save_val,exist_ok=True) #建立對應的文件夾
        #保存切分好的數據集
        for imgpath in img_list:
            imgname=Path(imgpath).name #獲取文件名
            if imgpath in train_list:
                img=cv2.imread(imgpath)
                new_img=expend_img(img)
                cv2.imwrite(save_train+os.sep+imgname,new_img)
            else: #將除了訓練集意外的數據均視為驗證集
                img = cv2.imread(imgpath)
                new_img = expend_img(img)
                cv2.imwrite(save_val + os.sep + imgname, new_img)
                
    print("split train and val finished !")

#資料增強
def data_enhancement(img_dir,save_dir,train_val_num):

    img_dir_list=glob.glob(img_dir+os.sep+"*")#獲取每个類别所在的路徑（一個類别對應一個文件夾）
    for class_dir in img_dir_list:
        class_name=class_dir.split(os.sep)[-1] #獲取當前類别
        img_list=glob.glob(class_dir+os.sep+"*") #獲取每個類别文件夾下的所有圖片
        all_num=len(img_list) #獲取總個數
        train_list=random.sample(img_list,int(all_num*train_val_num)) #訓練集圖片所在路徑
        save_train=save_dir+os.sep+"train"+os.sep+class_name
        save_val=save_dir+os.sep+"val"+os.sep+class_name
        os.makedirs(save_train,exist_ok=True)
        os.makedirs(save_val,exist_ok=True) #建立對應的文件夾
        # print(class_name+" trian num",len(train_list))
        # print(class_name+" val num",all_num-len(train_list))
        #保存切分好的數據集
        for imgpath in img_list:
            imgname=Path(imgpath).name #獲取文件名
            if imgpath in train_list:
                img= Image.open(imgpath)
                for time in range(3):
                    new_img=expend1_img(img)
                    Image.Image.save(new_img,save_train+os.sep+str(time)+imgname)

#資料增強( 透視轉換 )
def perspective_transform(img_dir,save_dir,train_val_num):
    
    img_dir_list=glob.glob(img_dir+os.sep+"*")#獲取每个類别所在的路徑（一個類别對應一個文件夾）
    for class_dir in img_dir_list:
        class_name=class_dir.split(os.sep)[-1] #獲取當前類别
        img_list=glob.glob(class_dir+os.sep+"*") #獲取每個類别文件夾下的所有圖片
        all_num=len(img_list) #獲取總個數
        train_list=random.sample(img_list,int(all_num*train_val_num)) #訓練集圖片所在路徑
        save_train=save_dir+os.sep+"train"+os.sep+class_name
        save_val=save_dir+os.sep+"val"+os.sep+class_name
        os.makedirs(save_train,exist_ok=True)
        os.makedirs(save_val,exist_ok=True) #建立對應的文件夾
        # print(class_name+" trian num",len(train_list))
        # print(class_name+" val num",all_num-len(train_list))
        #保存切分好的數據集
        for imgpath in img_list:
            imgname=Path(imgpath).name #獲取文件名
            if imgpath in train_list:
                img= Image.open(imgpath)
                for time in range(1):
                    img2 = expend2_img(img)
                    Image.Image.save(img2,save_train+os.sep+str(time)+'_per_'+imgname)






In [None]:
#將data分到對應的類別的資料夾

img_dir = 'cifar_10/test'
img_path_list = glob.glob(img_dir+os.sep+"*")
img_list= os.listdir(img_dir)
save_dir = 'cifar_10/test_class'

for t in range(10):
    os.makedirs(save_dir+'/'+str(t),exist_ok=True)

count = 0
for path in img_path_list:
    img=cv2.imread(path)
    new_img=expend_img(img)
    cv2.imwrite(save_dir+'/'+str(img_list[count][0])+'/'+img_list[count],new_img)
    count+=1


In [15]:
#將資料集全部資料合併成一份

# img_dir = 'imagenette2-320/train'
img_dir = 'imagenette2-320/val'
save_dir = 'imagenette_all'
img_dir_list=glob.glob(img_dir+os.sep+"*")#獲取每個類别所在的路徑（一個類别對應一個文件夾）
for class_dir in img_dir_list:
    class_name=class_dir.split(os.sep)[-1] #獲取當前類别
    img_list=glob.glob(class_dir+os.sep+"*") #獲取每個類别文件夾下的所有圖片
    all_num=len(img_list) #獲取總個數
    save_train=save_dir+os.sep+os.sep+class_name
    save_val=save_dir+os.sep+os.sep+class_name
    os.makedirs(save_train,exist_ok=True)
    os.makedirs(save_val,exist_ok=True) #建立對應的文件夾
    #保存切分好的數據集
    for imgpath in img_list:
        imgname=Path(imgpath).name #獲取文件名
        img=cv2.imread(imgpath)
        new_img=expend_img(img)
        cv2.imwrite(save_train+os.sep+imgname,new_img)

In [None]:
#將train資料集取出10%，和5% 5%分開(各類平均)

img_dir = 'cifar_10\cifar10'
save_dir = 'cifar_10\cifar10_random_50percent' 
save_dir2 = 'cifar_10\cifar10_second_5percent' 
# save_dir = 'imagenette_10percent/second_5percent' 
train_val_num = 0.5

img_dir_list=glob.glob(img_dir+os.sep+"*")#獲取每個類别所在的路徑（一個類别對應一個文件夾）
for class_dir in img_dir_list:
    class_name=class_dir.split(os.sep)[-1] #獲取當前類别
    img_list=glob.glob(class_dir+os.sep+"*") #獲取每個類别文件夾下的所有圖片
    all_num=len(img_list) #獲取總個數
    train_list=random.sample(img_list,int(all_num*train_val_num)) #訓練集圖片所在路徑
    save_one=save_dir+os.sep+os.sep+class_name
    save_two=save_dir2+os.sep+os.sep+class_name
    os.makedirs(save_one,exist_ok=True)
    # os.makedirs(save_two,exist_ok=True) #建立對應的文件夾

    # print(class_name+" trian num",len(train_list))
    # print(class_name+" val num",all_num-len(train_list))
    
    #保存切分好的數據集
    for imgpath in img_list:
        imgname=Path(imgpath).name #獲取文件名
        if imgpath in train_list:
            img=cv2.imread(imgpath)
            new_img=expend_img(img)
            cv2.imwrite(save_one+os.sep+imgname,new_img)
        # else:
        #     img=cv2.imread(imgpath)
        #     new_img=expend_img(img)
        #     cv2.imwrite(save_two+os.sep+imgname,new_img)



In [7]:
#將資料集切成train和val

img_dir = r'C:/Users/Taka/Desktop/efficientnet/ntust/data/og_data'
save_dir = r'C:/Users/Taka/Desktop/efficientnet/ntust/data/split_data' 
# img_dir = 'imagenette_10percent/second_5percent'
# save_dir = 'imagenette_10percent/second_5percent_split' 
train_val_num = 0.8
split_train_test(img_dir,save_dir,train_val_num)

split train and val finished !


In [7]:
#圖片資料增強

img_dir = 'cifar_10\cifar10_50percent_split/train'
save_dir = 'cifar_10\cifar10_50percent_split' 
train_val_num = 1.0
data_enhancement(img_dir,save_dir,train_val_num)
perspective_transform(img_dir,save_dir,train_val_num)

  "Argument interpolation should be of type InterpolationMode instead of int. "


In [None]:
#efficientionnet訓練模型

device="cuda" if torch.cuda.is_available() else "cpu"
# device = 'cpu'

class Efficientnet_train():
    def __init__(self,opt):
        self.epochs=opt.epochs #訓練週期
        self.batch_size=opt.batch_size #batch_size
        self.class_num=opt.class_num #類别數
        self.imgsz=opt.imgsz #圖片尺寸
        self.img_dir=opt.img_dir #圖片路徑
        self.weights=opt.weights #模型路徑
        self.save_dir=opt.save_dir #保存模型路徑
        self.lr=opt.lr #初始化學習率
        self.moment=opt.m #動量
        base_model = EfficientNet.from_name('efficientnet-b5') #加載模型，使用b幾的就改為b幾
        state_dict = torch.load(self.weights)
        base_model.load_state_dict(state_dict)
        # 修改全連接層
        num_ftrs = base_model._fc.in_features
        base_model._fc = nn.Linear(num_ftrs, self.class_num)
        print(device)
        self.model = base_model.to(device)
        # 交叉熵損失函數
        self.cross = nn.CrossEntropyLoss()
        # 優化器
        self.optimzer = optim.SGD((self.model.parameters()), lr=self.lr, momentum=self.moment, weight_decay=0.0004)

        #獲取處理後的數據集和類别映射表
        self.trainx,self.valx,self.b=self.process()
        print(self.b)
    def __call__(self):
        best_acc = 0
        self.model.train(True)
        for ech in tqdm(range(self.epochs)):
            optimzer1 = self.lrfn(ech, self.optimzer)

            print("----------Start Train Epoch %d----------" % (ech + 1))
            # 開始訓練
            run_loss = 0.0  # 損失
            run_correct = 0.0  # 準確率
            count = 0.0  # 分類正確的個數

            for i, data in enumerate(self.trainx):
                # print('train')
                inputs, label = data
                inputs, label = inputs.to(device), label.to(device)

                # 訓練
                optimzer1.zero_grad()
                output = self.model(inputs)

                loss = self.cross(output, label)
                loss.backward()
                optimzer1.step()

                run_loss += loss.item()  # 損失累加
                _, pred = torch.max(output.data, 1)
                count += label.size(0)  # 求總共的訓練個數
                run_correct += pred.eq(label.data).cpu().sum()  # 截止當前預測正確的個數
                #每隔100個batch顯示一次信息，這裡顯示的ACC是當前預測正確的個數/當前訓練過的個數
                if (i+1)%100==0:
                    print('[Epoch:{}__iter:{}/{}] | Acc:{}'.format(ech + 1,i+1,len(self.trainx), run_correct/count))
            # print(run_correct,'------------',count)
            train_acc = run_correct / count
            # 每次訓完一批顯示一次信息
            print('Epoch:{} | Loss:{} | Acc:{}'.format(ech + 1, run_loss / len(self.trainx), train_acc))

            # 訓完一批次後進行驗證
            print("----------Waiting Test Epoch {}----------".format(ech + 1))
            with torch.no_grad():
                correct = 0.  # 預測正確的個數
                total = 0.  # 總個數
                for inputs, labels in self.valx:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = self.model(inputs)

                    # 穫取最高分的那個類的索引
                    _, pred = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += pred.eq(labels).cpu().sum()
                test_acc = correct / total
                print("批次%d的验证集准确率" % (ech + 1), correct / total)
            if best_acc < test_acc:
                best_acc = test_acc
                start_time=(time.strftime("%m%d",time.localtime()))
                save_weight=self.save_dir+os.sep+start_time #保存路徑
                os.makedirs(save_weight,exist_ok=True)
                torch.save(self.model.state_dict(), save_weight + os.sep + "efficientb5_cifar100_10percent.pth")#不加state_dict()存法會直接把模型架構和權重一起存入weight檔中
                                                                                                       #加state_dict()則只單純存權重(不易報錯)
    #數據處理
    def process(self):
        # 數據增强
        data_transforms = {
            'train': transforms.Compose([
                transforms.Resize((self.imgsz, self.imgsz)),  # resize
                transforms.CenterCrop((self.imgsz, self.imgsz)),  # 中心裁剪
                transforms.RandomRotation(45),  # 随機旋轉，旋轉範圍為【-45,45】
                transforms.ToTensor(),  # 轉換為張量
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 標準化
            ]),
            "val": transforms.Compose([
                transforms.Resize((self.imgsz, self.imgsz)),  # resize
                transforms.CenterCrop((self.imgsz, self.imgsz)),  # 中心裁剪
                transforms.ToTensor(),  # 張量轉換
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        }
        # print('123',os.listdir(os.path.join(self.img_dir, 'train')))
        # 定義圖像生成器
        image_datasets = {x: datasets.ImageFolder(root=os.path.join('cifar_100\cifar100_10percent',x), transform=data_transforms[x]) for x in ['train', 'val']}
        # print(type(image_datasets))
        # 得到訓練集和驗證集
        trainx = DataLoader(image_datasets["train"], batch_size=self.batch_size, shuffle=True, drop_last=True)
        valx = DataLoader(image_datasets["val"], batch_size=self.batch_size, shuffle=True, drop_last=False)
        # print(image_datasets["train"])
        # print('trainx',len(trainx))
        # print('valx',len(valx))
        # print(self.batch_size)
        b = image_datasets["train"].class_to_idx  # id和類别對
        return trainx,valx,b


    # 學習率慢热加下降
    def lrfn(self,num_epoch, optimzer):
        lr_start = 0.00001  # 初始值
        max_lr = 0.0004  # 最大值
        lr_up_epoch = 10  # 學習率上升10个epoch
        lr_sustain_epoch = 5  # 學習率保持不變
        lr_exp = .8  # 衰减因子
        if num_epoch < lr_up_epoch:  # 0-10个epoch學習率線性增加
            lr = (max_lr - lr_start) / lr_up_epoch * num_epoch + lr_start
        elif num_epoch < lr_up_epoch + lr_sustain_epoch:  # 學習率保持不變
            lr = max_lr
        else:  # 指數下降
            lr = (max_lr - lr_start) * lr_exp ** (num_epoch - lr_up_epoch - lr_sustain_epoch) + lr_start
        for param_group in optimzer.param_groups:
            param_group['lr'] = lr
        return optimzer
#参數設置
def parse_opt():
    parser=argparse.ArgumentParser()
    parser.add_argument("--weights",type=str,default="./model/efficientnet-b5-b6417697.pth",help='initial weights path')#預訓練模型路徑
    parser.add_argument("--img-dir",type=str,default="./cifar_100\cifar100_10percent",help="train image path") #數據集的路徑
    parser.add_argument("--imgsz",type=int,default=224,help="image size") #圖像尺寸
    parser.add_argument("--epochs",type=int,default=50,help="train epochs")#訓練批次
    parser.add_argument("--batch-size",type=int,default=8,help="train batch-size") #batch-size
    parser.add_argument("--class_num",type=int,default=100,help="class num") #類別數
    parser.add_argument("--lr",type=float,default=0.0005,help="Init lr") #學習率初始值
    parser.add_argument("--m",type=float,default=0.9,help="optimer momentum") #動量
    parser.add_argument("--save-dir",type=str,default="./weight",help="save models dir")#保存模型路徑
    opt=parser.parse_known_args()[0]
    return opt

if __name__ == '__main__':
    opt=parse_opt()
    models=Efficientnet_train(opt)
    models()

In [5]:
#用訓練出的模型進行預測
input_size = 224
device = 'cuda'

means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]


# Load Test images
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor

unorm = UnNormalize(mean = means, std = stds)


class ImageFolderWithPaths(datasets.ImageFolder):
    def __init__(self, *args):
        super(ImageFolderWithPaths, self).__init__(*args)
        self.trans = args[1]
    def __len__(self):
      return len(self.imgs)
    def __getitem__(self, index):
        img, label = super(ImageFolderWithPaths, self).__getitem__(index)
        
        path = self.imgs[index][0]
        return (img, label ,path)


def loaddata(data_dir, batch_size, set_name, shuffle):
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
        'test_class': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
    }

    image_datasets = {x: ImageFolderWithPaths(os.path.join(data_dir, x), data_transforms[x]) for x in [set_name]}
    # num_workers=0 if CPU else = 1
    dataset_loaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                      batch_size=batch_size,
                                                      shuffle=shuffle, num_workers=0) for x in [set_name]}
    data_set_sizes = len(image_datasets[set_name])
    return dataset_loaders, data_set_sizes



def test_model(model, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    cont = 0
    outPre = []
    outLabel = []
    dset_loaders, dset_sizes = loaddata(data_dir=data_dir, batch_size=batch_size, set_name = 'test_class',shuffle=False)
    transform = T.ToPILImage()
    for data in dset_loaders['test_class']:
        inputs, labels, paths = data #path抓出被分類的圖片的原始路徑
        labels = labels.type(torch.LongTensor)
        
        # GPU
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        outputs = model(inputs)

        #計算預測結果entropy
        entropy = 0
        # for t in range(len(outputs.data)):
        #     for t2 in range(len(outputs.data[t])):
        #         entropy = entropy + (-1*( torch.log(outputs.data[t][t2]) * outputs.data[t][t2] ))


        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)
        if cont == 0:
            outPre = outputs.data.cpu()
            outLabel = labels.data.cpu()
        else:
            outPre = torch.cat((outPre, outputs.data.cpu()), 0)
            outLabel = torch.cat((outLabel, labels.data.cpu()), 0)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        cont += len(labels)
        acc = running_corrects/cont

    print('photo number : ',dset_sizes)
    print('Loss: {:.4f} Acc: {:.4f}'.format(running_loss / dset_sizes,
                                            running_corrects.double() / dset_sizes))

    return FUN.softmax(Variable(outPre)).data.numpy(), outLabel.numpy()


if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Start Testing
    net_name = 'efficientnet-b5'
    data_dir = 'cifar_100'
    save_dir = 'weight/0921'
    modelft_file = save_dir + "/" + 'efficientb5_cifar100_50percent_just_good_model' + '.pth'
    batch_size = 2

    # GPU時
    model_ft = efficientnet_pytorch.EfficientNet.from_name(net_name)
    # 修改全連接層
    num_ftrs = model_ft._fc.in_features
    model_ft._fc = nn.Linear(num_ftrs, 100)
    model_ft = model_ft.to(device)

    model_ft.load_state_dict(torch.load(modelft_file))
    print(type(model_ft))
    criterion = nn.CrossEntropyLoss().cuda()
    outPre, outLabel = test_model(model_ft, criterion)

<class 'efficientnet_pytorch.model.EfficientNet'>
photo number :  10000
Loss: 3.2281 Acc: 0.4521




In [5]:
#生成dict，內容 : ( 各類的cofidence , gt label  ) 

tensor = []
label = []
class_num = []

input_size = 224
device = 'cuda'
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor
means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]
unorm = UnNormalize(mean = means, std = stds)
class ImageFolderWithPaths(datasets.ImageFolder):
    def __init__(self, *args):
        super(ImageFolderWithPaths, self).__init__(*args)
        self.trans = args[1]
    def __len__(self):
      return len(self.imgs)
    def __getitem__(self, index):
        img, label = super(ImageFolderWithPaths, self).__getitem__(index)
        
        path = self.imgs[index][0]
        return (img, label ,path)
# Load Test images
def loaddata(data_dir, batch_size, set_name, shuffle):
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
    }

    image_datasets = {x: ImageFolderWithPaths(os.path.join(data_dir, x), data_transforms[x]) for x in [set_name]}
    # num_workers=0 if CPU else = 1
    dataset_loaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                      batch_size=batch_size,
                                                      shuffle=shuffle, num_workers=0) for x in [set_name]}
    data_set_sizes = len(image_datasets[set_name])
    return dataset_loaders, data_set_sizes


def test_model(model, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    cont = 0
    outPre = []
    outLabel = []
    img_path = []
    dset_loaders, dset_sizes = loaddata(data_dir=data_dir, batch_size=batch_size, set_name='val', shuffle=False)
    transform = T.ToPILImage()
    for data in dset_loaders['val']:
        inputs, labels, paths = data #path抓出被分類的圖片的原始路徑
        labels = labels.type(torch.LongTensor)
        
        # GPU
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        outputs = model(inputs)

        tensor.append(outputs.data)

        _, preds = torch.max(outputs.data, 1)

        class_num.append(labels)

        loss = criterion(outputs, labels)
        if cont == 0:
            outPre = outputs.data.cpu()
            outLabel = labels.data.cpu()
        else:
            outPre = torch.cat((outPre, outputs.data.cpu()), 0)
            outLabel = torch.cat((outLabel, labels.data.cpu()), 0)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        cont += len(labels)
        acc = running_corrects/cont

    return FUN.softmax(Variable(outPre)).data.numpy(), outLabel.numpy()


if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Start Testing
    net_name = 'efficientnet-b5'
    data_dir = 'cifar_10\cifar10_second_5percent_split'
    save_dir = 'weight/0610'
    modelft_file = save_dir + "/" + 'efficientb5_cifar10_10percent' + '.pth'
    batch_size = 1

    # GPU時
    model_ft = efficientnet_pytorch.EfficientNet.from_name(net_name)
    # 修改全連接層
    num_ftrs = model_ft._fc.in_features
    model_ft._fc = nn.Linear(num_ftrs, 10)
    model_ft = model_ft.to(device)

    model_ft.load_state_dict(torch.load(modelft_file))
    print(type(model_ft))
    criterion = nn.CrossEntropyLoss().cuda()
    outPre, outLabel = test_model(model_ft, criterion)

# # tensor = torch.stack(tensor)
# label = torch.stack(label)
class_num = torch.stack(class_num)


#製作訓練集(以5%的模型跑)
# with open('pickle/cifar_val_tensor.pickle', 'wb') as f:
#     pickle.dump(tensor, f)

#製作label(以10%的模型跑)
with open('pickle/cifar_val_label.pickle', 'wb') as f:
    pickle.dump(tensor, f)

#製作每筆data對應到的class的列表
# with open('pickle/cifar_val_class_label.pickle', 'wb') as f:
#     pickle.dump(class_num, f)

<class 'efficientnet_pytorch.model.EfficientNet'>




In [36]:
#建線性模型dataset

with open('train_tensor.pickle', 'rb') as f:
    train_tensor = pickle.load(f)
with open('train_label.pickle', 'rb') as f:
    train_label = pickle.load(f)
with open('val_tensor.pickle', 'rb') as f:
    val_tensor = pickle.load(f)
with open('val_label.pickle', 'rb') as f:
    val_label = pickle.load(f)

with open('train_class_label.pickle', 'rb') as f:
    train_class_label = pickle.load(f)

with open('val_class_label.pickle', 'rb') as f:
    val_class_label = pickle.load(f)

class_number = 10
batch_size = 4

'''按batch size包裝資料'''
register_3 = []
register_4 = []
train_tensor_list= []
train_label_list= []
for class_name in range(class_number):
    count = 0
    register_1 = []
    register_2 = []
    for t in range(len(train_tensor)):
        if int(train_class_label[t]) == class_name:
            register_1.append(train_tensor[t])
            register_2.append(train_label[t])
            count+=1
        if count  == batch_size:
            a = torch.cat(register_1)
            b = torch.cat(register_2)
            register_1 = []
            register_2 = []
            register_3.append(a)
            register_4.append(b)
            count = 0
    a = torch.stack(register_3)
    b = torch.stack(register_4)
    train_tensor_list.append(a)
    train_label_list.append(b)
    register_3 = []
    register_4 = []

register_3 = []
register_4 = []
val_tensor_list= []
val_label_list= []
for class_name in range(class_number):
    count = 0
    register_1 = []
    register_2 = []
    for t in range(len(val_tensor)):
        if int(val_class_label[t]) == class_name:
            register_1.append(val_tensor[t])
            register_2.append(val_label[t])
            count+=1
        if count  == batch_size:
            a = torch.cat(register_1)
            b = torch.cat(register_2)
            register_1 = []
            register_2 = []
            register_3.append(a)
            register_4.append(b)
            count = 0
    a = torch.stack(register_3)
    b = torch.stack(register_4)
    val_tensor_list.append(a)
    val_label_list.append(b)
    register_3 = []
    register_4 = []

for i, images in enumerate(val_label_list[0]):
    print(images)
for i, images in enumerate(val_label_list[0]):
    print(images)





tensor([[ 4.6421e+00,  3.0447e-01, -6.9695e-01, -1.5140e-01, -8.7047e-01,
         -7.7928e-01, -7.4363e-01, -1.0252e+00, -4.9470e-01,  4.7723e-01],
        [ 2.6250e+00, -3.5314e-01, -5.1155e-01, -4.1791e-01, -5.2495e-01,
         -1.8490e-01, -4.1576e-01, -6.0706e-01,  8.0143e-02,  6.6952e-01],
        [ 5.1802e+00, -6.9600e-01, -2.5664e-02, -4.0577e-03, -1.3055e+00,
         -8.8313e-01, -7.3957e-01, -1.2481e+00,  2.0464e-01,  2.1246e-02],
        [ 3.6038e+00, -2.5990e-01,  1.5668e-01, -8.2306e-01, -4.0782e-01,
         -6.1584e-01, -8.8105e-01, -9.5602e-01,  2.5508e-01,  3.7847e-01]],
       device='cuda:0')
tensor([[ 4.5153, -0.1113, -0.6022, -0.5720, -1.2648, -0.3586, -0.5964, -1.2770,
         -0.0112,  0.1757],
        [ 5.1651, -0.8470, -0.7290, -0.9412, -0.8426, -0.0106, -1.0423, -1.4531,
          0.2703,  0.0832],
        [ 2.7483, -0.3633, -0.2387, -0.1087, -0.5100,  0.0830, -0.2539, -0.6474,
         -0.2717,  0.0592],
        [ 3.0937, -0.4380, -0.5149, -0.2293, -0.5986

In [None]:
#模型對second 5 percent的預測結果訓練一個線性模型

with open('pickle/cifar_train_tensor.pickle', 'rb') as f:
    train_tensor = pickle.load(f)
with open('pickle/cifar_train_label.pickle', 'rb') as f:
    train_label = pickle.load(f)
with open('pickle/cifar_val_tensor.pickle', 'rb') as f:
    val_tensor = pickle.load(f)
with open('pickle/cifar_val_label.pickle', 'rb') as f:
    val_label = pickle.load(f)

with open('pickle/cifar_train_class_label.pickle', 'rb') as f:
    train_class_label = pickle.load(f)

with open('pickle/cifar_val_class_label.pickle', 'rb') as f:
    val_class_label = pickle.load(f)

device = 'cuda' if torch.cuda.is_available() else "cpu"




'''讓數據集可以叠代'''
batch_size = 8
n_iters = 20000

'''定義模型'''
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return out

'''實例化模型'''
class_number = 10
input_dim = class_number
output_dim = 1

model = LogisticRegressionModel(input_dim, output_dim)
model = model.to(device)

'''定義損失計算方式'''
cauculate = torch.nn.MSELoss(reduce = True,size_average = True)

learning_rate = 0.0005

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

'''按batch size包裝資料'''
register_3 = []
register_4 = []
train_tensor_list= []
train_label_list= []
for class_name in range(class_number):
    count = 0
    register_1 = []
    register_2 = []
    for t in range(len(train_tensor)):
        
        numpy = FUN.softmax(Variable(train_tensor[t])).data.cpu().numpy()#把input從confidence轉為機率
        train_tensor[t] = torch.from_numpy(numpy)
        numpy2 = FUN.softmax(Variable(train_label[t])).data.cpu().numpy()#把input從confidence轉為機率
        train_label[t] = torch.from_numpy(numpy2)

        if int(train_class_label[t]) == class_name:
            register_1.append(train_tensor[t])
            register_2.append(train_label[t])
            count+=1
        if count  == batch_size:
            a = torch.cat(register_1)
            b = torch.cat(register_2)
            register_1 = []
            register_2 = []
            register_3.append(a)
            register_4.append(b)
            count = 0
    a = torch.stack(register_3)
    b = torch.stack(register_4)
    train_tensor_list.append(a)
    train_label_list.append(b)
    register_3 = []
    register_4 = []

register_3 = []
register_4 = []
val_tensor_list= []
val_label_list= []
for class_name in range(class_number):
    count = 0
    register_1 = []
    register_2 = []
    for t in range(len(val_tensor)):

        numpy = FUN.softmax(Variable(val_tensor[t])).data.cpu().numpy()#把input從confidence轉為機率
        val_tensor[t] = torch.from_numpy(numpy)
        numpy2 = FUN.softmax(Variable(val_label[t])).data.cpu().numpy()#把input從confidence轉為機率
        val_label[t] = torch.from_numpy(numpy2)

        if int(val_class_label[t]) == class_name:
            register_1.append(val_tensor[t])
            register_2.append(val_label[t])
            count+=1
        if count  == batch_size:
            a = torch.cat(register_1)
            b = torch.cat(register_2)
            register_1 = []
            register_2 = []
            register_3.append(a)
            register_4.append(b)
            count = 0
    a = torch.stack(register_3)
    b = torch.stack(register_4)
    val_tensor_list.append(a)
    val_label_list.append(b)
    register_3 = []
    register_4 = []



'''訓練次數'''
save_dir = 'linear_weight'

for class_num in range(class_number):
    print('model for class '+str(class_num))
    num_epochs = n_iters / (len(train_tensor_list[class_num]))
    num_epochs = int(num_epochs)
    iter = 0
    best_loss = 6
    for epoch in range(num_epochs):
        count = 0
        for i, images in enumerate(train_tensor_list[class_num]):
            register_1=[]
            for b_size in range(batch_size):
                a = train_label_list[class_num][count][b_size][class_num]
                a = a.unsqueeze(0)
                register_1.append(a)

            labels = torch.stack(register_1)
            labels = Variable(labels)

            count+=1
            #梯度置零
            optimizer.zero_grad()
            
            #計算輸出
            outputs = model(images)
            # print(images,'--------',outputs)
            # print(labels)
            
            #計算損失，內部會自動softmax然後進行Crossentropy
            loss = cauculate(outputs.float(),labels.float())
            #反向傳播
            loss.backward()
            
            #更新參數
            optimizer.step()
            
            iter += 1
            
            if iter % 500 == 0:
                #計算準確度
                all = 0
                count2 = 0
                for images in val_tensor_list[class_num]:
                    register_1=[]
                    for b_size in range(batch_size):
                        a = val_label_list[class_num][count2][b_size][class_num]
                        a = a.unsqueeze(0)
                        register_1.append(a)

                    labels = torch.stack(register_1)
                    labels = Variable(labels)
                    outputs = model(images)
                    loss = cauculate(outputs.float(),labels.float())
                    all = loss+all
                    count2+=1
                loss = all/count2
                if loss < best_loss:
                    best_loss = loss
                    start_time=(time.strftime("%m%d",time.localtime()))
                    save_weight=save_dir+os.sep+start_time #保存路徑
                    os.makedirs(save_weight,exist_ok=True)
                    torch.save(model.state_dict(), save_weight + os.sep +'cifar_'+str(class_num)+".pth")

                    
                # Print Loss
                print('Iteration: {}. Loss: {}. '.format(iter, loss))

In [97]:
#各個線性模型產出output
with open('pickle/cifar_train_tensor.pickle', 'rb') as f:
    train_tensor = pickle.load(f)
with open('pickle/cifar_train_label.pickle', 'rb') as f:
    train_label = pickle.load(f)
with open('pickle/cifar_val_tensor.pickle', 'rb') as f:
    val_tensor = pickle.load(f)
with open('pickle/cifar_val_label.pickle', 'rb') as f:
    val_label = pickle.load(f)

with open('pickle/cifar_train_class_label.pickle', 'rb') as f:
    class_label = pickle.load(f)

with open('pickle/cifar_val_class_label.pickle', 'rb') as f:
    val_class_label = pickle.load(f)

    '''定義模型'''
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return out

'''實例化模型'''
class_number = 10
input_dim = class_number
output_dim = 1
device = 'cuda'
weight_dir = 'linear_weight/0610'
weight_path = os.listdir(weight_dir)
model_file = []

for t in range(len(weight_path)):
    path = weight_dir + '/' + weight_path[t]
    model_file.append(path)

tensor_register = []

model = LogisticRegressionModel(input_dim, output_dim)

for t in range(len(train_tensor)):

    register = []

    numpy = FUN.softmax(Variable(train_tensor[t])).data.cpu().numpy()#把input從confidence轉為機率
    train_tensor[t] = torch.from_numpy(numpy)
 
    for model_num in range(len(model_file)):
        model.load_state_dict(torch.load(model_file[model_num]))
        model = model.to(device)
        register.append(model(train_tensor[t]))
    new_tensor = torch.tensor(register)
    tensor_register.append(new_tensor)
new_tensor = torch.stack(tensor_register)

print(train_tensor[1600])
print(new_tensor[1600])

class_num = []
class_num_og = []
for t in range((len(new_tensor))):

    class_num.append(torch.max(new_tensor[t],0)[1])
class_num = torch.stack(class_num)
class_num = class_num.to(device)
class_label = class_label.view(-1)


for t in range((len(train_tensor))):

    numpy = FUN.softmax(Variable(train_tensor[t])).data.cpu().numpy()#把input從confidence轉為機率
    train_tensor[t] = torch.from_numpy(numpy)

    s = train_tensor[t].squeeze()
    class_num_og.append(torch.max(s,0)[1])
class_num_og = torch.stack(class_num_og)
class_num_og = class_num_og.to(device)

correct = 0
for t in range(len(class_num)):
    if  class_num[t] == class_label[t]:
        correct +=1

correct2 = 0
for t in range(len(class_num_og)):
    if  class_num_og[t] == class_label[t]:
        correct2 +=1

acc = correct/len(class_num)
acc2 = correct2/len(class_num_og)
print('acc_og : ',acc2,'---- count : ',correct2,' acc : ',acc,'---- count : ',correct ,' all : ',len(class_num))




tensor([[0.0616, 0.0301, 0.0243, 0.2599, 0.0312, 0.0336, 0.4237, 0.0205, 0.0142,
         0.1009]], device='cuda:0')
tensor([0.8162, 0.2865, 0.1526, 0.1421, 0.1411, 0.1410, 0.1410, 0.1410, 0.1410,
        0.1410])




acc_og :  0.7417142857142857 ---- count :  1298  acc :  0.1 ---- count :  175  all :  1750


In [5]:
#預測100% data
#生成dict，內容 : ( 各類的cofidence , gt label  ) 

tensor = []
label = []
class_num = []
image_path = []

input_size = 224
device = 'cuda'
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor
means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]
unorm = UnNormalize(mean = means, std = stds)
class ImageFolderWithPaths(datasets.ImageFolder):
    def __init__(self, *args):
        super(ImageFolderWithPaths, self).__init__(*args)
        self.trans = args[1]
    def __len__(self):
      return len(self.imgs)
    def __getitem__(self, index):
        img, label = super(ImageFolderWithPaths, self).__getitem__(index)
        path = self.imgs[index][0]
        return (img, label ,path)
# Load Test images
def loaddata(data_dir, batch_size, set_name, shuffle):
    data_transforms = {
        'train': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
        'cifar10': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize(means, stds)
        ]),
    }

    image_datasets = {x: ImageFolderWithPaths(os.path.join(data_dir, x), data_transforms[x]) for x in [set_name]}
    # num_workers=0 if CPU else = 1
    dataset_loaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                      batch_size=batch_size,
                                                      shuffle=shuffle, num_workers=0) for x in [set_name]}
    data_set_sizes = len(image_datasets[set_name])
    return dataset_loaders, data_set_sizes


def test_model(model, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    cont = 0
    outPre = []
    outLabel = []
    dset_loaders, dset_sizes = loaddata(data_dir=data_dir, batch_size=batch_size, set_name='cifar10', shuffle=False)
    transform = T.ToPILImage()
    for data in dset_loaders['cifar10']:
        inputs, labels, paths = data #path抓出被分類的圖片的原始路徑
        labels = labels.type(torch.LongTensor)
        image_path.append(paths)
        # GPU
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        class_num.append(labels)
        outputs = model(inputs)

        tensor.append(outputs.data)
        label.append(outputs.data)

        _, preds = torch.max(outputs.data, 1)


        loss = criterion(outputs, labels)
        if cont == 0:
            outPre = outputs.data.cpu()
            outLabel = labels.data.cpu()
        else:
            outPre = torch.cat((outPre, outputs.data.cpu()), 0)
            outLabel = torch.cat((outLabel, labels.data.cpu()), 0)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        cont += len(labels)
        acc = running_corrects/cont

    return FUN.softmax(Variable(outPre)).data.numpy(), outLabel.numpy()


if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # Start Testing
    net_name = 'efficientnet-b5'
    data_dir = 'cifar_10'
    save_dir = 'weight/0610'
    modelft_file = save_dir + "/" + 'efficientb5_cifar10_10percent' + '.pth'
    batch_size = 1

    # GPU時
    model_ft = efficientnet_pytorch.EfficientNet.from_name(net_name)
    # 修改全連接層
    num_ftrs = model_ft._fc.in_features
    model_ft._fc = nn.Linear(num_ftrs, 10)
    model_ft = model_ft.to(device)

    model_ft.load_state_dict(torch.load(modelft_file))
    print(type(model_ft))
    criterion = nn.CrossEntropyLoss().cuda()
    outPre, outLabel = test_model(model_ft, criterion)

# tensor = torch.stack(tensor)
# label = torch.stack(label)
class_num = torch.stack(class_num)
print(len(class_num))
print(len(tensor))

#製作訓練集
# with open('pickle\cifar_data/testset_tensor.pickle', 'wb') as f:
#     pickle.dump(tensor, f)

#製作label
with open('pickle\cifar_data/alldata_result.pickle', 'wb') as f:
    pickle.dump(label, f)

# 製作每筆data對應到的class的列表
with open('pickle\cifar_data/alldata_class_label.pickle', 'wb') as f:
    pickle.dump(class_num, f)

with open('pickle\cifar_data/alldata_img_path.pickle', 'wb') as f:
    pickle.dump(image_path, f)

<class 'efficientnet_pytorch.model.EfficientNet'>




50000
50000


In [None]:
#依照entropy挑出50% data
with open('100percent_train_tensor.pickle', 'rb') as f:
    train_tensor = pickle.load(f)

with open('100percent_val_tensor.pickle', 'rb') as f:
    val_tensor = pickle.load(f)

with open('100percent_train_img_path.pickle', 'rb') as f:
    train_image_path = pickle.load(f)

with open('100percent_val_img_path.pickle', 'rb') as f:
    val_image_path = pickle.load(f)

list = []
for  t in range(len(train_tensor)):
    list.append(train_tensor[t])
for  t in range(len(val_tensor)):
    list.append(val_tensor[t])
tensor = torch.stack(list)

list2 = []
for t in range(len(train_image_path)):
    list2.append(train_image_path[t])
for t in range(len(val_image_path)):
    list2.append(val_image_path[t])
image_path = list2

'''算entropy'''
def entropy(input):
    all = 0
    for t in range(len(input)):
        if input[t]>=0:
            en = -(input[t]*math.log(input[t],2))
            all = all+en
    return all

'''定義模型'''
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return out

'''實例化模型'''
class_number = 10
input_dim = class_number
output_dim = 1
device = 'cuda'
weight_dir = 'linear_weight/0606'
weight_path = os.listdir(weight_dir)
model_file = []

for t in range(len(weight_path)):
    path = weight_dir + '/' + weight_path[t]
    model_file.append(path)

tensor_register = []

model = LogisticRegressionModel(input_dim, output_dim)

for t in range(len(tensor)):

    register = []

    for model_num in range(len(model_file)):
        model.load_state_dict(torch.load(model_file[model_num]))
        model = model.to(device)
        register.append(model(tensor[t]))

    new_tensor = torch.tensor(register)
    tensor_register.append(new_tensor)

new_tensor = torch.stack(tensor_register)

img_save_dir = '50_percent/50_percent_data' 
img_dir = 'imagenette_all'
all_en = []
for t in range(len(new_tensor)):
    odds = FUN.softmax(Variable(new_tensor[t])).data.numpy()
    data_entropy = entropy(odds)
    all_en.append(data_entropy)
sort = sorted(range(len(all_en)) , reverse = True,key = lambda k : all_en[k])
img_dir_list=glob.glob(img_dir+os.sep+"*")#獲取每個類别所在的路徑（一個類别對應一個文件夾）
for class_dir in img_dir_list:
    class_name=class_dir.split(os.sep)[-1] #獲取當前類别
    save_train=img_save_dir+os.sep+os.sep+class_name
    save_val=img_save_dir+os.sep+os.sep+class_name
    os.makedirs(save_train,exist_ok=True)
    os.makedirs(save_val,exist_ok=True) #建立對應的文件夾

for t in range(int(len(sort)/2)):
    p = re.sub("\,","",str(image_path [sort[t]]))
    p = re.sub("\(","",p)
    p = re.sub("\)","",p)
    p = re.sub("\'","",p)
    split = p.split('\\')

    img = cv2.imread(p)
    cv2.imwrite(img_save_dir+'/'+ split[4]+'/'+split[6], img)




In [None]:
#生成CIFAR10資料集



# 生成訓練集圖片，如果需要png格式，只需要改圖片字尾名即可。
for j in range(1, 6):
    dataName = "cifar-10-batches-py/data_batch_" + str(j)  # 讀取當前目錄下的data_batch12345檔案，dataName其實也是data_batch檔案的路徑，本文和指令碼檔案在同一目錄下。
    Xtr = unpickle(dataName)
    print(dataName + " is loading...")

    for i in range(0, 10000):
        img = np.reshape(Xtr['data'][i], (3, 32, 32))  # Xtr['data']為圖片二進位制資料
        img = img.transpose(1, 2, 0)  # 讀取image
        picName = 'cifar_10/train/' + str(Xtr['labels'][i]) + '_' + str(i + (j - 1)*10000) + '.jpg'  # Xtr['labels']為圖片的標籤，值範圍0-9，本文中，train資料夾需要存在，並與指令碼檔案在同一目錄下。
        imsave(picName, img)
    print(dataName + " loaded.")

print("test_batch is loading...")

# 生成測試集圖片
testXtr = unpickle("cifar-10-batches-py/test_batch")
for i in range(0, 10000):
    img = np.reshape(testXtr['data'][i], (3, 32, 32))
    img = img.transpose(1, 2, 0)
    picName = 'cifar_10/test/' + str(testXtr['labels'][i]) + '_' + str(i) + '.jpg'
    imsave(picName, img)
print("test_batch loaded.")

In [None]:
#測試於CIFAR10 中的效果

In [None]:
with open('pickle\cifar_data/testset_result.pickle', 'rb') as f:
    result = pickle.load(f)

with open('pickle\cifar_data/testset_class_label.pickle', 'rb') as f:
    label = pickle.load(f)

with open('pickle\cifar_data/testset_img_path.pickle', 'rb') as f:
    path = pickle.load(f)

model_path = 'linear_weight/cifar/model0.pth'

device = 'cuda'
MyResModel = LSTM_FCN(input_dim=1, hidden_dim=16, output_dim=5, layers=1).to(device)
MyResModel.init_model()
MyResModel.load_state_dict(torch.load(model_path))
MyResModel = MyResModel.to(device)

numpy = FUN.softmax(Variable(result[5])).data.cpu().numpy()
tensor = torch.tensor(numpy)
outputs = MyResModel(tensor)
print(outputs)