# Generator
Dataset generator for the task of automatic detection of canonical orientation of a photo by convolutional neural networks.

In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


## Import modules

In [None]:
root_dir = '/content'
project_dir = '/drive/My Drive/AML project 2021-2-F1801Q151'
data_dir = root_dir + project_dir + '/datasets'

In [None]:
#@title Imports
from time import time
from PIL import Image
import os
import requests
from shutil import copyfile, copyfileobj
import tarfile

In [None]:
#@title Define helper functions

def createFolder(name) :
  if not os.path.exists(name):
    os.mkdir(name)

def transfer(filename, remote_dir, local_dir):
  t0 = time()
  filename = '/' + filename
  if not os.path.isdir(local_dir + '/Images'):
    ds_source = remote_dir + filename
    ds_destination = local_dir + filename
    copyfile(ds_source, ds_destination)
    print('File transfer completed in %0.3f seconds' % (time() - t0))
    extract(local_dir, ds_destination)
  else:
    print('File already transfered!')

def extract(local_dir, ds_destination, root_dir='/content'):
  t1 = time()
  os.chdir(local_dir)
  tar = tarfile.open(ds_destination)
  tar.extractall()
  tar.close()
  os.remove(ds_destination)
  os.chdir(root_dir)
  print('File extraction completed in %0.3f seconds' % (time() - t1))

def download(url):
  filename = url.split('/')[-1]
  if not os.path.isfile(data_dir + '/' + filename):
    os.chdir(data_dir)
    with requests.get(url, stream=True) as r:
      with open(filename, 'wb') as f:
        copyfileobj(r.raw, f)
    os.chdir(root_dir)
  return filename

def download_txt(url):
  filename = url.split('/')[-1]
  if not os.path.isfile(data_dir + '/' + filename):
    os.chdir(data_dir)
    with requests.get(url) as r:
      with open(filename, 'wb') as f:
        f.write(r.content)
    os.chdir(root_dir)
  return filename

def get_image_paths(filename):
  """
  Return a list of images' paths
  :param filename: string

  :return: collection of images' paths 
  :rtype: list
  """
  path = data_dir + '/' + filename
  image_paths = []
  with open(path, mode='r') as paths:
    lines = paths.readlines()
    for line in lines:
      image_paths.append(line.replace('\n', ''))
  return image_paths

def generate(local_dir,
             train_image_paths,
             source_ds_dir,
             destination_ds_dir,
             root_dir='/content',
             new_width=224,
             new_height=224,
             resample=Image.BICUBIC):
  dir = os.listdir(local_dir + '/' + destination_ds_dir + '0')
  if len(dir) == 0:
    new_dim = (new_width, new_height)
    rotate = [Image.ROTATE_90, Image.ROTATE_180, Image.ROTATE_270]
    os.chdir(local_dir)
    for image_path in train_image_paths:
      original_image = Image.open(source_ds_dir + image_path, mode='r')
      image_name = image_path.split('/')[1]
      for i, angle in enumerate([0, 90, 180, 270]):
        final_image = original_image.resize(size=new_dim, resample=resample)
        if i != 0:
          final_image = final_image.transpose(method=rotate[i - 1])
        if final_image.mode != 'RGB':
          final_image = final_image.convert('RGB')
        final_image.save(destination_ds_dir + '{}/'.format(angle) + image_name)
    os.chdir(root_dir)
  else:
    print('Generation process already done!')

def backup(local_dir, remote_dir, filename):
  os.chdir(local_dir)
  ds_source = filename
  tar = tarfile.open(ds_source, 'w')
  tar.add(filename.split('.')[0])
  tar.close()
  ds_destination = remote_dir + '/' + filename
  copyfile(ds_source, ds_destination)
  os.remove(ds_source)

## Generate dataset

### indoorCVPR_09

In [None]:
# Download the Indoor Scene Recognition database.
# The database contains 67 Indoor categories, and a total of 15620 images.
# The number of images varies across categories, but there are at least 100 images per category.
# All images are in jpg format.

indoor_url = 'http://groups.csail.mit.edu/vision/LabelMe/NewImages/indoorCVPR_09.tar'
indoor_filename = download(indoor_url)

# A subset of the Indoor Scene Recognition database is considered,
# containing 80 training images and 20 testing images per category.

indoor_train_url = 'https://web.mit.edu/torralba/www/TrainImages.txt'
indoor_train_filename = download_txt(indoor_train_url)

indoor_test_url = 'https://web.mit.edu/torralba/www/TestImages.txt'
indoor_test_filename = download_txt(indoor_test_url)

#### Generate indoorCVPR_09 dataset

In [None]:
indoor_train_image_paths = get_image_paths(indoor_train_filename)
indoor_test_image_paths = get_image_paths(indoor_test_filename)

In [None]:
local_indoor_dir = root_dir + '/indoor'
createFolder(local_indoor_dir)
CLASS_NAMES = [0, 180, 270, 90]

rotated_dataset = local_indoor_dir + '/RotatedImages'
createFolder(rotated_dataset)
for name in CLASS_NAMES:
  createFolder(rotated_dataset + '/{}'.format(name))

rotated_test_dataset = local_indoor_dir + '/RotatedTestImages'
createFolder(rotated_test_dataset)
for name in CLASS_NAMES:
  createFolder(rotated_test_dataset + '/{}'.format(name))

In [None]:
transfer(indoor_filename, data_dir, local_indoor_dir)  # transfer original dataset from GoogleDrive to GoogleColaboratory and extract it

File transfer completed in 67.833 seconds
File extraction completed in 43.478 seconds


In [None]:
t0 = time()
generate(local_indoor_dir, indoor_train_image_paths, source_ds_dir='Images/', destination_ds_dir='RotatedImages/')
print('Rotation of train set completed in %0.3f seconds' % (time() - t0))

Rotation of train set completed in 162.443 seconds


In [None]:
t0 = time()
generate(local_indoor_dir, indoor_test_image_paths, source_ds_dir='Images/', destination_ds_dir='RotatedTestImages/')
print('Rotation of test set completed in %0.3f seconds' % (time() - t0))

Rotation of test set completed in 40.770 seconds


#### Backup indoorCVPR_09 to Google Drive

In [None]:
remote_indoor_dir = data_dir + '/indoor'
createFolder(remote_indoor_dir)

In [None]:
# Backup generated train dataset from GoogleColaboratory to GoogleDrive
t0 = time()
backup(local_indoor_dir, remote_indoor_dir, filename='RotatedImages.tar')
print('Train dataset transfer completed in %0.3f seconds' % (time() - t0))

Train dataset transfer completed in 8.838 seconds


In [None]:
# Backup generated test dataset from GoogleColaboratory to GoogleDrive
t0 = time()
backup(local_indoor_dir, remote_indoor_dir, filename='RotatedTestImages.tar')
print('Test dataset transfer completed in %0.3f seconds' % (time() - t0))

Test dataset transfer completed in 2.431 seconds
