In [1]:
import numpy as np
import pandas as pd
from collections.abc import Iterable

import geopandas as gp
from rasterio.plot import show
import rasterio as rio
import rasterio.features
import rasterio.warp
import earthpy.spatial as es
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import pyproj;  

import urllib
import json
import pickle

import time
from ipypb import track
import sys
import warnings
import os

### Set directories

In [2]:
os.chdir("E:/University College London/O'Sullivan, Aidan - SDG6/")
s_path = './Landsat data/Scenes/'
c1_path = "./Landsat data/Cropped level 1 data/"

### Read in list of sites for download and setup metadata

In [3]:
sites = pd.read_pickle("sites3.p").set_index(['sid','dt'])
sites = sites.set_geometry('geometry_poly')

In [4]:
### Add in display id and aquisition date

# ziplist = zip(sites.index.get_level_values(0),sites.index.get_level_values(1),
#               sites.l7_scene_id,sites.l8_scene_id)

# for sid,dt,l7_scene_id,l8_scene_id in track(ziplist,len(sites)):
    
#     date = dt.strftime("%Y-%m-%d")
    
#     if (isinstance(l7_scene_id,str)==True):
#         sc_meta7 = pickle.load(open('./Landsat/Scenes/scene_'+str(l7_scene_id)+".p","rb"))
        
#         sites.loc[(sid,date),'l7_display_id'] = sc_meta7[0]['displayId']
#         sites.loc[(sid,date),'l7_acquisition_dt'] = sc_meta7[0]['acquisitionDate']
        
#         del(sc_meta7)

#     elif (isinstance(l8_scene_id,str)==True):
#         sc_meta8 = pickle.load(open('./Landsat/Scenes/scene_'+str(l8_scene_id)+".p","rb"))
        
#         sites.loc[(sid,date),'l8_display_id'] = sc_meta8[0]['displayId']
#         sites.loc[(sid,date),'l8_acquisition_dt'] = sc_meta8[0]['acquisitionDate']
        
#         del(sc_meta8)

### Cropper function

In [4]:
def cropper(raster, geoms, outpath):
    """
    This function accepts a raster object, an interable list of geometrys (or a single geometry),
    and a filepath to save the cropped raster to, the cropped raster is then read back in and returned.
    """    
    ## As crop accepts an iterable of geoms we first put any single geoms into a list
    if not isinstance(geoms, Iterable):
        geoms = [geoms]

    ## Next we crop the image
    raster_crop, raster_crop_meta = es.crop_image(raster, geoms)

    ## We now need to update the metadata with the spatial data
    raster_crop_meta.update({'transform': raster_crop_meta['transform'],
                             'height': raster_crop.shape[1],
                             'width': raster_crop.shape[2],
                             'nodata': raster_crop.min()}) # <- This is the 'mask' value
    
    with rio.open(outpath, 'w', **raster_crop_meta) as file:
        file.write(raster_crop[0], 1)
        
    raster_crop = rio.open(outpath)
    
    return raster_crop

### Filter sites for download

In [5]:
### Filter sites and dates for those with scenes
dll = sites[sites.display_id.notna()]

### Filter sites for those with geometry
dll = dll[dll.geometry_poly.notna()].set_geometry('geometry_poly')

len(dll)

221557

### Flag already downloaded files

In [6]:
## check directory
os.getcwd()

"E:\\University College London\\O'Sullivan, Aidan - SDG6"

In [7]:
dl_files = os.listdir(c1_path)

dll['downloaded'] = pd.Series(np.bool)

for sid in track(dll.index.get_level_values(0).unique()):
                 
    sid_files = [ i for i in dl_files if sid in i ]
    
    sitedl = dll.loc[sid,:]
    
    if len([ i for i in sid_files if 'BQA' in i ])<len(sitedl):
        dll.loc[sid,'downloaded'] = False
    else:
        dll.loc[sid,'downloaded'] = True


In [11]:
13175*260/3600/24

39.64699074074074

In [10]:
len(dll[dll.downloaded==True])

57

### Open crop and save rasters from Google open API

In [None]:
## set buffer distance (300m) in degrees
bbox_bufd = 360*300/(40000*1000)

## list for exceptions
crop_exceptions = [pd.read_csv('./Landsat data/crop_exceptions.csv')]
source_exceptions = [pd.read_csv('./Landsat data/source_exceptions.csv')]
meta_exceptions = [pd.read_csv('./Landsat data/meta_exceptions.csv')]

for sid in track(dll[dll.downloaded==False].index.get_level_values(0).unique()[0:500]):
    
    ## subset site list for site
    sitedl = dll.loc[sid,:]
    
    ## setup polygon for cropping
    polygon_bbox = sitedl.envelope.buffer(bbox_bufd)

    ### Create scene entity lists
    dsids = list(sitedl[sitedl.display_id.notna()].display_id)

    dsids_5 = [ i for i in dsids if 'LT05' in i]
    dsids_7 = [ i for i in dsids if 'LE07' in i]
    dsids_8 = [ i for i in dsids if 'LC08' in i]
    
    #~~~~~~~~download raster bands~~~~~~~~~~~#
    
    ## site id
    site_id = sid
    
    ## create bands list
    bands_5 = ['B1','B2','B3','B4','B5','BQA','B6']
    bands_7 = ['B1','B2','B3','B4','B5','BQA','B6_VCID_1','B6_VCID_2']
    bands_8 = ['B2','B3','B4','B5','BQA','B10','B11']

    ## create iterate list
    zip_list = list(zip(sorted(bands_5*len(dsids_5)),dsids_5*len(bands_5)))
    zip_list += list(zip(sorted(bands_7*len(dsids_7)),dsids_7*len(bands_7)))
    zip_list += list(zip(sorted(bands_8*len(dsids_8)),dsids_8*len(bands_8)))

    for band,dsid in zip_list:

        ## create url elements
        displayid = dsid
        platform = dsid[0:4]
        key = dsid[10:13]+'/'+dsid[13:16]

        ## TIF construct source url
        filepath = f'https://storage.googleapis.com/gcp-public-data-landsat/{platform}/01/{key}/{dsid}/{dsid}_{band}.TIF'

        try:

            with rio.open(filepath) as src:

                polygon_bbox = polygon_bbox.to_crs(src.crs)
                polygon_geom = polygon_bbox.geometry

                try:
                    
                    cropped = cropper(src, polygon_geom, c1_path+site_id+'__'+dsid+'_'+band+'.TIF')

                except:
                    crop_exceptions.append(dsid+band)
                    print(f'Crop exception for {dsid} {band}')
                    

                src.close()

            time.sleep(2)

        except:
            source_exceptions.append(dsid+band)
            print(f'Source exception for {dsid} {band}')
            print(filepath)

    
    #~~~~~~~~download meta data~~~~~~~~~~~#                  

    meta_file_5 = ['MTL','ANG']
    meta_file_7 = ['MTL','ANG','GCP']
    meta_file_8 = ['MTL','ANG']
    
    ## create iterate list
    mzip_list = list(zip(sorted(meta_file_5*len(dsids_5)),dsids_5*len(meta_file_5)))
    mzip_list += list(zip(sorted(meta_file_7*len(dsids_7)),dsids_7*len(meta_file_7)))
    mzip_list += list(zip(sorted(meta_file_8*len(dsids_8)),dsids_8*len(meta_file_8)))

    
    for mf,dsid in mzip_list:

        ## create url elements
        displayid = dsid
        platform = dsid[0:4]
        key = dsid[10:13]+'/'+dsid[13:16]

        ## Metadata construct source url
        filepath = f'https://storage.googleapis.com/gcp-public-data-landsat/{platform}/01/{key}/{dsid}/{dsid}_{mf}.txt'

        try:

            ## MTL file
            remote_file = urllib.request.urlopen(filepath).read()

            local_file = open(c1_path+dsid+'_'+mf+'.txt','wb')
            local_file.write(remote_file)
            local_file.close()

        except:
            meta_exceptions.append(dsid)

            time.sleep(2)

            print(f'Scene meta not available {dsid}')
            
pd.Series(meta_exceptions).to_csv('./Landsat data/meta_exceptions.csv')
pd.Series(crop_exceptions).to_csv('./Landsat data/crop_exceptions.csv')
pd.Series(source_exceptions).to_csv('./Landsat data/source_exceptions.csv')

Crop exception for LT05_L1TP_041030_20020702_20160916_01_T1 B6
Source exception for LT05_L1TP_042030_20050717_20160912_01_T1 B6
https://storage.googleapis.com/gcp-public-data-landsat/LT05/01/042/030/LT05_L1TP_042030_20050717_20160912_01_T1/LT05_L1TP_042030_20050717_20160912_01_T1_B6.TIF
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 B1
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 B2
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 B3
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 B4
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 B5
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 B6
Crop exception for LT05_L1TP_044028_20030710_20160915_01_T1 BQA
Crop exception for LE07_L1TP_041029_20130505_20160911_01_T1 B1
Crop exception for LE07_L1TP_041029_20130505_20160911_01_T1 B2
Crop exception for LE07_L1TP_041029_20130505_20160911_01_T1 B3
Crop exception for LE07_L1TP_041029_20130505_20160911_01_T1 B4
Crop exception for

Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B1
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B1
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B1
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B2
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B2
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B2
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B3
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B3
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B3
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B4
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B4
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B4
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B5
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B5
Crop exception for LT05_L1TP_013028_20100501_20160901_01_T1 B5
Crop exception for LT05_L1TP_013028_20100501_20160901_0

Crop exception for LT05_L1TP_012030_20050715_20160912_01_T1 B4
Crop exception for LT05_L1TP_012030_20050715_20160912_01_T1 B4
Crop exception for LT05_L1TP_012030_20050715_20160912_01_T1 B4
Crop exception for LT05_L1TP_012030_20050715_20160912_01_T1 B4
Crop exception for LT05_L1TP_012030_20090608_20160906_01_T1 B4
Crop exception for LT05_L1TP_012030_20090608_20160906_01_T1 B4
Crop exception for LT05_L1TP_012030_20090608_20160906_01_T1 B4
Crop exception for LT05_L1TP_012030_20090608_20160906_01_T1 B4
Crop exception for LT05_L1TP_012030_20090608_20160906_01_T1 B4
Crop exception for LT05_L1TP_012030_20090608_20160906_01_T1 B4
Crop exception for LT05_L1TP_012030_20090710_20160907_01_T1 B4
Crop exception for LT05_L1TP_012030_20090710_20160907_01_T1 B4
Crop exception for LT05_L1TP_012030_20090710_20160907_01_T1 B4
Crop exception for LT05_L1TP_012030_20090710_20160907_01_T1 B4
Crop exception for LT05_L1TP_012030_20090827_20160903_01_T1 B4
Crop exception for LT05_L1TP_012030_20090827_20160903_0

Crop exception for LE07_L1TP_012030_20060726_20160923_01_T1 B5
Crop exception for LE07_L1TP_012030_20060726_20160923_01_T1 B5
Crop exception for LE07_L1TP_012030_20130814_20160909_01_T1 B5
Crop exception for LE07_L1TP_012030_20130814_20160909_01_T1 B5
Crop exception for LE07_L1TP_012030_20130830_20160908_01_T1 B5
Crop exception for LE07_L1TP_012030_20130830_20160908_01_T1 B5
Crop exception for LE07_L1TP_012030_20130830_20160908_01_T1 B5
Crop exception for LE07_L1TP_012030_20130830_20160908_01_T1 B5
Crop exception for LE07_L1TP_012030_20050605_20160924_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012030_20050605_20160924_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012030_20060726_20160923_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012030_20060726_20160923_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012030_20130814_20160909_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012030_20130814_20160909_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012030_20130830_20160908_01_T1 B6_VCID_1
Crop e

Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B5
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B5
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B5
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B5
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B5
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_1
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_2
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_2
Crop exception for LE07_L1TP_012031_20120710_20160911_01_T1 B6_VCID_2
Crop exception for LE07_L1TP_012031_20120710_20160911_0

Crop exception for LT05_L1TP_015029_20040903_20160913_01_T1 B6
Crop exception for LT05_L1TP_015029_20041005_20160913_01_T1 B6
Crop exception for LT05_L1TP_015029_20040903_20160913_01_T1 BQA
Crop exception for LT05_L1TP_015029_20041005_20160913_01_T1 BQA
Crop exception for LE07_L1TP_013029_20110917_20160913_01_T1 B1
Crop exception for LE07_L1TP_015029_20140619_20160906_01_T1 B1
Crop exception for LE07_L1TP_015029_20150521_20160902_01_T1 B1
Crop exception for LE07_L1TP_015029_20140619_20160906_01_T1 B2
Crop exception for LE07_L1TP_015029_20150521_20160902_01_T1 B2
Crop exception for LE07_L1TP_015029_20140619_20160906_01_T1 B3
Crop exception for LE07_L1TP_015029_20150521_20160902_01_T1 B3


### Download metadata

In [26]:
## list for exceptions
meta_exceptions = ['Meta file not downloaded']

meta_file_7 = ['MTL','ANG','GCP']
meta_file_8 = ['MTL','ANG']

for sid in track(dll2.index.get_level_values(0)): #[dll2.downloaded==False]
    
    sitedl = dll2.loc[sid,:]
    
    ### Create scene entity lists
    dsids = list(sitedl[sitedl.l7_display_id.notna()].l7_display_id)
    dsids += list(sitedl[sitedl.l8_display_id.notna()].l8_display_id)
    
    #dsids_5 = [ i for i in sitedl.l5_display_id if 'LT05' in i]
    dsids_7 = [ i for i in dsids if 'LE07' in i]
    dsids_8 = [ i for i in dsids if 'LC08' in i]
    
    ## create iterate list
    #zip_list = list(zip(sorted(bands_5*len(dsids_5)),dsids_5*len(bands_5)))
    zip_list = list(zip(sorted(meta_file_7*len(dsids_7)),dsids_7*len(meta_file_7)))
    zip_list += list(zip(sorted(meta_file_8*len(dsids_8)),dsids_8*len(meta_file_8)))

    for mf,dsid in zip_list:

        ## create url elements
        displayid = dsid
        platform = dsid[0:4]
        key = dsid[10:13]+'/'+dsid[13:16]

        ## Metadata construct source url
        filepath = f'https://storage.googleapis.com/gcp-public-data-landsat/{platform}/01/{key}/{dsid}/{dsid}_{mf}.txt'
        
        try:

            ## MTL file
            remote_file = urllib.request.urlopen(filepath).read()

            local_file = open(c1_path+dsid+'_'+mf+'.txt','wb')
            local_file.write(remote_file)
            local_file.close()

        except:
            meta_exceptions.append(dsid)

            time.sleep(2)

            print(f'Scene meta not available {dsid}')