In [1]:
import os
import glob
import tarfile
import numpy as np
from scipy.io import loadmat
from shutil import copyfile, rmtree
import sys
import json
'''获得Oxford flower 102 数据集图片放在文件夹jpg，
   并将不同种类的花分别存放在class/0～101
'''
if sys.version_info[0] >= 3:
    from urllib.request import urlretrieve
else:
    # Not Python 3 - today, it is most likely to be Python 2
    # But note that this might need an update when Python 4
    # might be around one day
    from urllib import urlretrieve

In [2]:
def download_file(url, dest=None):
    if not dest:
        dest = os.path.join(data_path, url.split('/')[-1])
    urlretrieve(url, dest)

In [30]:
data_path = 'flower'

In [4]:
# 下载数据集的图片和标签，如果文件已经存在则不会下载
if not os.path.exists(data_path):
    os.mkdir(data_path)
#下载图片
flowers_archive_path = os.path.join(data_path, '102flowers.tgz')
if not os.path.isfile(flowers_archive_path):
    print ('Downloading images...')
    download_file('http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz')
    #解压图片压缩包
    tarfile.open(flowers_archive_path).extractall(path=data_path)
#下载标签
image_labels_path = os.path.join(data_path, 'imagelabels.mat')
if not os.path.isfile(image_labels_path):
    print("Downloading image labels...")
    download_file('http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat')

Downloading images...
Downloading image labels...


In [5]:
# 读取标签文件
image_labels = loadmat(image_labels_path)['labels'][0]
#标签改为从0开始
image_labels -= 1

In [6]:
#将一一对应的文件名和标签打包
files = sorted(glob.glob(os.path.join(data_path, 'jpg', '*.jpg')))
labels = np.array([i for i in zip(files, image_labels)])

In [7]:
# 获取当前工作目录
cwd = os.getcwd()

In [9]:
def move_files(dir_name,cwd,labels):
    '''并将不同种类的花分别存放在class/0～101
       输入： dir_name：存放分类图片的文件夹名
             labels：np.array,存放文件名与标签的对应关系，shape(m,2),m为图片数
    '''
    cur_dir_path = os.path.join(cwd, dir_name)
    if not os.path.exists(cur_dir_path):
        os.mkdir(cur_dir_path)
    for i in range(0, 102):
        class_dir = os.path.join(cwd, dir_name, str(i))
        os.mkdir(class_dir)
    for label in labels:
        src = str(label[0])
        dst = os.path.join(cwd,dir_name, label[1], src.split(os.sep)[-1])
        copyfile(src, dst)


In [10]:
dir_name=os.path.join(data_path,'class')
move_files(dir_name,cwd,labels)

In [15]:
def save_dict(content,filename):
    '''保存数据为字典json文件
       输入：content：np.array
            filename:存储的文件名
    '''
    content = dict(content)
    with open(filename,'w') as file_object:
        json.dump(content,file_object)

In [26]:
def load_dict(filename):
    '''加载保存的json文件
       输入：文件名
       输出：dict
    '''
    with open(filename,'r') as file_object:
        content = json.load(file_object)
    return content

In [31]:
#保存标签字典
save_dict(labels,os.path.join(data_path,'image-label.json'))