In [0]:
import urllib.request
import numpy as np
import subprocess
import os
from google.colab import files
import shutil
import torch
from torch.utils.data import TensorDataset, DataLoader

In [0]:
from google.colab import drive
root_dir = '/content/drive/'
drive.mount(root_dir, force_remount = True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


Download the Data:


In [0]:
def download(classes):
    link = 'https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap'
    base_dir = root_dir + 'My Drive/ML_Project/Dataset'
    print('Downloading ...')
    for c in classes:
        cname = c.replace(" ", "%20") # (Google cloud links replace spaces with 20%)
        print(f'{link}/{cname}.npy')
        urllib.request.urlretrieve(f'{link}/{cname}.npy', f'{base_dir}/{c}.npy')

In [0]:
download(['flamingo', 'sheep'])

Downloading ...
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/flamingo.npy
https://storage.googleapis.com/quickdraw_dataset/full/numpy_bitmap/sheep.npy


Split Data into training, test and validation, and save it:

In [0]:
def prepare_data():
    """
    Split data into training, validation, and testing data
    """
    base_dir = root_dir + 'My Drive/ML_Project/Dataset'
    file_names = os.listdir(base_dir)
    
    x_train, y_train = np.empty([0, 784]), np.empty([0])
    x_valid, y_valid = np.empty([0, 784]), np.empty([0])
    x_test, y_test = np.empty([0, 784]), np.empty([0])
    
    for idx, fname in enumerate(file_names):
        print(fname)
        
        data = np.load(f'{base_dir}/{fname}', allow_pickle=True)     
        labels = np.full(data.shape[0], idx)
        
        x_train = np.concatenate((x_train, data[:8400, :]), axis=0)
        y_train = np.append(y_train, labels[:8400])

        x_valid = np.concatenate((x_valid, data[8400: 12000, :]), axis=0)
        y_valid = np.append(y_valid, labels[8400: 12000])

        x_test = np.concatenate((x_test, data[12000: 15000, :]), axis=0)
        y_test = np.append(y_test, labels[12000: 15000])


    N = y_train.shape[0]
    # randomize training dataset
    rand = np.random.permutation(N)
    x_train, y_train = x_train[rand, :], y_train[rand]

    print(f'{len(y_train)} training files, {len(y_valid)} validation files, {len(y_test)} testing files')

    os.mkdir(root_dir + 'My Drive/ML_Project/Train')
    np.savez_compressed(root_dir + 'My Drive/ML_Project/Train/Train', data=x_train, target=y_train)
    os.mkdir(root_dir + 'My Drive/ML_Project/Validation')
    np.savez_compressed(root_dir + 'My Drive/ML_Project/Validation/Validation', data=x_valid, target=y_valid)
    os.mkdir(root_dir + 'My Drive/ML_Project/Test')
    np.savez_compressed(root_dir + 'My Drive/ML_Project/Test/Test', data=x_test, target=y_test)

In [0]:
prepare_data()

tree.npy
t-shirt.npy
ice cream.npy
fish.npy
face.npy
car.npy
bowtie.npy
apple.npy
flamingo.npy
sheep.npy
84000 training files, 36000 validation files, 30000 testing files


In [0]:
def load_data(batch_size = 128):
    """
    Returns a list of PyTorch DataLoaders in order of train, validation, and test loaders
    """
    dirnames = [root_dir + 'My Drive/ML_Project/Train/', root_dir + 'My Drive/ML_Project/Validation/', root_dir + 'My Drive/ML_Project/Test/']
    loaders = []
    for dirname in dirnames:
        dataname = os.listdir(dirname)[0]
        xs = np.load(dirname + dataname)['data'].reshape(-1, 28, 28)
        ys = np.load(dirname + dataname)['target']
        print(f'Loading {dataname} into Pytorch Dataloader ... Xs shape: {xs.shape}, Ys shape: {ys.shape}')
        loaders.append(DataLoader(TensorDataset(Tensor(xs), Tensor(ys)), batch_size = batch_size, shuffle=True))
    
    return loaders