In [None]:
import os
import sys
import tarfile
from six.moves.urllib.request import urlretrieve

url = 'http://ufldl.stanford.edu/housenumbers/'
data_folder = 'data/'

if not os.path.exists(data_folder):
    os.makedirs(data_folder)

def download_data(filename, expected_bytes, force=False):
    """Download a file if not present, and make sure it's the right size."""
    file_path = data_folder + filename
    if force or not os.path.exists(file_path):
        filename, _ = urlretrieve(url + filename, file_path)
    statinfo = os.stat(file_path)
    if statinfo.st_size == expected_bytes:
        print('Found and verified', filename)
    else:
        raise Exception(
          'Failed to verify ' + filename + '. Can you get to it with a browser?')
    return filename

def extract_data(filename, force=False):
    # remove .tar.gz
    root = data_folder + os.path.splitext(os.path.splitext(filename)[0])[0]
    if os.path.isdir(root) and not force:
        # You may override by setting force=True.
        print('%s already present - Skipping extraction of %s.' % (root, filename))
    else:
        print('Extracting data for %s. This may take a while. Please wait.' % root)
        tar = tarfile.open(data_folder + filename)
        sys.stdout.flush()
        tar.extractall(data_folder)
        tar.close()
    return root

train_filename = download_data('train.tar.gz', 404141560)
test_filename = download_data('test.tar.gz', 276555967)
extra_filename = download_data('extra.tar.gz', 1955489752)

cropped_train_filename = download_data('train_32x32.mat', 182040794)
cropped_test_filename = download_data('test_32x32.mat', 64275384)
cropped_extra_filename = download_data('extra_32x32.mat', 990297376)

train_folder = extract_data('train.tar.gz')
test_folder = extract_data('test.tar.gz')
extra_folder = extract_data('extra.tar.gz')