In [1]:
from landsatxplore.api import API
from glob import glob
import os, sys

# NB: Python 3.12 has pip install issues. Need to create an environment with python 3.11 for now. 

In [2]:
# Initialize a new API instance and get an access key
api = API("pinnacle55", "KM89xq6rSRZ6zJP")

In [12]:
# list of years to search
years = range(2013, 2023)

scene_dict = {}

for year in years:

    # Search for Landsat TM scenes
    scenes = api.search(
        dataset='landsat_ot_c2_l2',
        latitude=1.3521,
        longitude=103.8198,
        start_date=f'{year}-02-01', # rainy season ends in Jan, this should be mostly cloudless
        end_date=f'{year}-08-01',
        max_cloud_cover=40
    )
    
    # sort scenes by amount of cloud cover
    scenes = sorted(scenes, key = lambda x: x["cloud_cover"])
    
    # store scenes in dictionary
    scene_dict[str(year)] = scenes



In [14]:
# check how many scenes per year
# to avoid over downloading (each scene is ~1GB, set limit to 5 scenes per year)
for key in scene_dict:
    print()
    for scene in scene_dict[key]:
        print(key, scene['cloud_cover'], scene['landsat_product_id'].split('_')[3])


2013 5 20130627
2013 19 20130424

2014 9 20140513
2014 17 20140411
2014 23 20140529
2014 23 20140222
2014 24 20140326
2014 30 20140614
2014 34 20140310

2015 11 20150414
2015 22 20150703
2015 25 20150225
2015 30 20150601
2015 34 20150516
2015 40 20150313

2016 11 20160705
2016 18 20160416
2016 20 20160502
2016 20 20160331
2016 28 20160228
2016 30 20160315
2016 33 20160603
2016 38 20160518

2017 17 20170724
2017 25 20170419
2017 28 20170318
2017 30 20170708
2017 35 20170505
2017 40 20170606

2018 6 20180524
2018 7 20180508
2018 12 20180727
2018 16 20180217
2018 27 20180422

2019 18 20190204
2019 19 20190527
2019 28 20190730
2019 31 20190220
2019 33 20190714
2019 37 20190511

2020 11 20200716
2020 14 20200310
2020 25 20200529
2020 35 20200326

2021 14 20210617
2021 21 20210703
2021 31 20210313
2021 38 20210601
2021 38 20210225

2022 13 20220401
2022 23 20220628
2022 26 20220417
2022 26 20220324
2022 35 20220730
2022 40 20220519


In [None]:
from landsatxplore.earthexplorer import EarthExplorer

# Sign in to earthexplorer
ee = EarthExplorer("pinnacle55", "KM89xq6rSRZ6zJP")

for key in years[:5]:
    key = str(key)
    print(f"Downloading scenes in {key}.")

    scene_count = 0
    
    for scene in scene_dict[key]:     
        
        # Limit downloads to 5 scenes
        # Scenes are sorted from lowest to highest cloud_cover, so limiting the scenes should be alright
        if scene_count >= 5:
            break
        
        
        # be wary - for some reason, landsat_product_id gives the L1TP products
        # but display_id gives the L2SP products
        # choose the product you want carefully
        print(f"Attempting to download {scene['landsat_product_id']}")
        
        
        # note that the appears to be an issue where the program will claim that the download failed but 
        # it actually succeeded - despite this the program will throw an error and the script will not
        # continue. As a workaround use a try except block to continue the loop despite the error being 
        # thrown.
        try:
            ee.download(scene['landsat_product_id'], output_dir=f'./data_collated/{key}')
        except:
            print(f"{scene['landsat_product_id']} may have failed to download! Check to be sure.")
            continue
        
        # if the file was downloaded successfully, add to scene_count
        # it seems level 2 products are downloaded with .tar
        if os.path.exists(f"./data_collated/{key}/{scene['landsat_product_id']}.tar"):
            print(f"{scene['landsat_product_id']} downloaded successfully!")
            scene_count += 1

ee.logout()

Downloading scenes in 2013.
Attempting to download LC08_L1TP_125059_20130627_20200912_02_T1
Download failed with dataset id 1 of 2. Re-trying with the next one.


 40%|███████████████████████████████▉                                               | 385M/955M [01:51<03:10, 3.14MB/s]

In [16]:
### Code to set up a bunch of folders in for data download
# base_dir = "./data_collated"

# years = range(2013, 2023)

# for year in years:
#     year_folder = os.path.join(base_dir, str(year))
#     if not os.path.exists(year_folder):
#         os.mkdir(year_folder)

In [8]:
# use if you already have some scenes downloaded - this will help skip those scenes
existing_scenes = os.listdir("./data")

scenes_to_dl = []

for scene in scenes:
    # be wary - for some reason, landsat_product_id gives the L1TP products
    # but display_id gives the L2SP products
    # choose the product you want carefully
    product_id = scene['display_id']
    if product_id not in existing_scenes:
        scenes_to_dl.append(product_id)

LC08_L2SP_125059_20210703_20210712_02_T1
LC08_L2SP_125059_20210601_20210608_02_T1
LC08_L2SP_125059_20210313_20210318_02_T1
