In [108]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import scipy.io
import numpy as np
import os
import shutil
from tqdm import tqdm
from PIL import Image

### Paths
To use this notebooks correctly adjust the paths below to your corresponding directories:

from `https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/` download the:
- <b>data</b> (includes all training, testing and validation images)
- <b>annotations</b> (annotations for the data)

File structure you need to create must look like: 
```
data
│   
└───FGVC-Aircraft
    │
    └───fgvc-aircraft-2013b
    │   
    └───fgvc-aircraft-2013b-annotations
    
```

In [109]:
global_path = '../../data/FGVC-Aircraft'
dataset_path = global_path+'/fgvc-aircraft-2013b'
annotations_path = global_path+'/fgvc-aircraft-2013b-annotations'

In [110]:
imgs_train_fnames_path = dataset_path+'/data/images_train.txt'
imgs_test_fnames_path = dataset_path+'/data/images_test.txt'
imgs_val_fnames_path = dataset_path+'/data/images_val.txt'

imgs_path = dataset_path+'/data/images'

In [111]:
training_fnames = []
testing_fnames = []
validation_fnames = []

# Loading training image file_names
with open(imgs_train_fnames_path, 'r') as data_file:
    for line in data_file:
        training_fnames.append(line.split('\n')[0])

# Loading testing image file_names        
with open(imgs_test_fnames_path, 'r') as data_file:
    for line in data_file:
        testing_fnames.append(line.split('\n')[0])
    
# Loading validation image file_names        
with open(imgs_val_fnames_path, 'r') as data_file:
    for line in data_file:
        validation_fnames.append(line.split('\n')[0])

In [6]:
print("[Training imgs names]", training_fnames[0], len(training_labels))
print("[Testing imgs names]", testing_fnames[0], len(testing_labels))
print("[Validations img names]", validation_fnames[0], len(validation_labels))

[Training labels] 1025794 3334
[Testing labels] 1514522 3333
[Validations labels] 0481847 3333


In [7]:
imgs_names = [name[:-4] for name in os.listdir(imgs_path)] # Cuts off the extension '.jpg'
imgs_names[0], len(imgs_names)

('1376762', 10000)

In [80]:
# labels_map = {'train': {}, 'test': {}, 'val': {}}
labels_map = {}
stage_paths = [(annotations_path+'/data/images_manufacturer_train.txt', 'train'),
               (annotations_path+'/data/images_manufacturer_test.txt', 'test'),
               (annotations_path+'/data/images_manufacturer_val.txt', 'val')]

# Loading alle image file_names with their classes
for path, stage in stage_paths:
    with open(path, 'r') as data_file:
        for line in data_file:
            x = line.split('\n')[0]
            fname = x[:7]
            class_ = x[8::]
            labels_map[fname] = class_

labels = np.asarray(list(labels_map.values()))
classes, distribution = np.unique(classes, return_counts=True)

'ATR'

In [112]:
# Create structure for loading into PyTorch Dataset model
folder_name = 'pytorch_structured_dataset'
folder_path = os.path.join(global_path, folder_name)
def mkdir_if_not_exists(dirpath):
    if not os.path.exists(dirpath):
        os.mkdir(dirpath)

mkdir_if_not_exists(folder_path)
for subfolder_name in ['train', 'val']:
    subfolder_path = os.path.join(folder_path, subfolder_name)
    mkdir_if_not_exists(subfolder_path)
    for class_ in classes:
        mkdir_if_not_exists(os.path.join(subfolder_path, class_))

In [113]:
# Copy all training and testing images into pytorch structure
for file_names, stage in [[training_fnames, 'train'], [testing_fnames, 'val']]:
    for file_name in tqdm(file_names):
        file_path = imgs_path+'/'+str(file_name)+'.jpg'
        class_ = labels_map.get(file_name)
        src = imgs_path+'/'+str(file_name)+'.jpg'
        dst = os.path.join(global_path, folder_name, stage, class_, file_name+'.jpg')
        shutil.copyfile(src, dst)

100%|█████████████████████████████████████| 3334/3334 [00:01<00:00, 1669.27it/s]
100%|█████████████████████████████████████| 3333/3333 [00:01<00:00, 1767.74it/s]


In [104]:
# training_labels

['1025794',
 '1340192',
 '0056978',
 '0698580',
 '0450014',
 '1042824',
 '0894380',
 '1427680',
 '0817494',
 '0716386',
 '0951982',
 '0731614',
 '0582363',
 '1082409',
 '2031775',
 '0950991',
 '0869722',
 '0979376',
 '1002439',
 '0864665',
 '1207591',
 '0582372',
 '0729223',
 '1319365',
 '0548719',
 '0577855',
 '1423583',
 '1187431',
 '0610657',
 '0869742',
 '0687610',
 '1042021',
 '0482761',
 '0064933',
 '1019011',
 '0732456',
 '0218039',
 '2223757',
 '0983618',
 '1327285',
 '0065840',
 '0980196',
 '0995286',
 '1514487',
 '0213505',
 '1459191',
 '1063986',
 '0056337',
 '0409532',
 '0771164',
 '0632010',
 '0901504',
 '0063052',
 '0143080',
 '1089989',
 '0907359',
 '0875281',
 '1197395',
 '1149060',
 '1059846',
 '0786258',
 '1512865',
 '0523223',
 '1115308',
 '1739565',
 '0994986',
 '0193983',
 '0822348',
 '1152739',
 '0907429',
 '0209554',
 '1187709',
 '0847273',
 '0923506',
 '0059101',
 '1548405',
 '0438655',
 '0921022',
 '1094669',
 '0198443',
 '0851647',
 '0247942',
 '0564495',
 '08