In [1]:
import os
from multiprocess import Pool
import datetime as dt
from datetime import date
from dateutil.rrule import rrule, DAILY
from tqdm import tqdm

In [2]:
regions = ['atlantic', 'south_atlantic', 'east_pacific', 'south_pacific_1', 'south_pacific_2']
start_date = date(2012, 1, 1)
end_date = date(2019, 11, 12)

def get_urls():
    all_urls = []
    for dt in rrule(DAILY, dtstart=start_date, until=end_date):
        current_date = dt.strftime("%Y-%m-%d")
        for region in regions:
            for satellite in ['Aqua', 'Terra']:
                all_urls.append((str(current_date), str(satellite), str(region)))
    return all_urls


urls = get_urls()
print("Number of urls:", len(urls))
print("Sample urls:", urls[0], '\n', urls[1])

Number of urls: 28730
Sample urls: ('2012-01-01', 'Aqua', 'atlantic') 
 ('2012-01-01', 'Terra', 'atlantic')


In [3]:
def save_images(url):
    import requests
    from PIL import Image
    from io import BytesIO
    import random
    import string
    from requests.adapters import HTTPAdapter
    from requests.packages.urllib3.util.retry import Retry
    current_date, satellite, region = url
    if region == 'atlantic':
        bottom, left, top, right = [10.08984375, -54.597656250000014,  24.08984375, -33.597656250000014]
    elif region == 'south_atlantic':
        bottom, left, top, right = [-21.48046875,  -28.160156250000014, -7.48046875, -7.160156250000014]
    elif region == 'east_pacific':
        bottom, left, top, right = [9.66796875, 132.01171875, 23.66796875, 153.01171875]
    elif region == 'south_pacific_1':
        bottom, left, top, right = [-20.63671875, -110.56640624999997, -6.63671875, -89.56640624999997]
    elif region == 'south_pacific_2':
        bottom, left, top, right = [-20.49609375, -149.02734374999997,  -6.49609375,-128.02734374999997]
    url = 'https://wvs.earthdata.nasa.gov/api/v1/snapshot?REQUEST=GetSnapshot&TIME={}T00:00:00Z&BBOX={},{},{},{}&CRS=EPSG:4326&LAYERS=MODIS_{}_CorrectedReflectance_TrueColor,Coastlines&WRAP=day,x&FORMAT=image/jpeg&WIDTH=480&HEIGHT=320&ts=1569875246328'.format(current_date, bottom, left, top, right, satellite)
    session = requests.Session()
    retry = Retry(connect=3, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    response = requests.get(url)
    img = Image.open(BytesIO(response.content))
    name = '_'.join([current_date, region, satellite])
    img.save("data/unlabeled/" + name + ".jpg")
    return name

with Pool(8) as pool:
    urls = urls[24984:]
    results = list(tqdm(pool.imap(save_images, urls), total=len(urls)))

100%|██████████████████████████████████████████████████████████████████████████████| 3746/3746 [18:46<00:00,  2.03it/s]
