In [8]:
# Specially for jupyter notebook
from tqdm.notebook import tqdm
import ee
ee.Initialize()
import pandas as pd

In [9]:
# Prepare google earth engine assets
deforest_hotspots = ee.FeatureCollection('users/thex/deforestation_amazon_hotspots')

# Use Landsat 8 surface reflectance data. Filter to deforest hotspots.
l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR').filterBounds(deforest_hotspots)

# Use Landsat 5 surface reflectance data. Filter to deforest hotspots.
l5sr = ee.ImageCollection('LANDSAT/LT05/C01/T1_SR').filterBounds(deforest_hotspots)

ee_months = ee.List(list(range(1,13)))

## Get image counts L8SR

In [10]:
# Sample cloudcover
feat_dict = {}
years = [2013, 2014, 2015, 2016, 2017, 2018, 2019]
months = list(range(1,13))
cloud_covers = [0,1,2,5,10]

rows = []
for y in tqdm(years, leave = False):

    # No clouds
    ee_list = ee_months.map(
                        lambda m: l8sr.filter(ee.Filter.calendarRange(y, y, 'year'))
                        .filter(ee.Filter.calendarRange(m, m, 'month'))
                        .filterBounds(deforest_hotspots)
                        .size()).flatten().getInfo()
    
    for m,month_value in enumerate(ee_list):
        row = {}
        row['year'] = y
        row['month'] = m+1
        row['size'] = month_value
        rows.append(row)
        
    for c in tqdm(cloud_covers, leave = False):

        ee_list = ee_months.map(
                lambda m: l8sr.filter(ee.Filter.calendarRange(y, y, 'year'))
                .filter(ee.Filter.calendarRange(m, m, 'month'))
                .filterBounds(deforest_hotspots).filterMetadata("CLOUD_COVER","less_than", c)
                .size()).flatten().getInfo()
        
        for m,month_value in enumerate(ee_list):
            row = {}
            row['cloud_cover'] = c
            row['year'] = y
            row['month'] = m+1
            row['size'] =  month_value
            rows.append(row)
            
            

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=7.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

In [11]:
df_l8sr_image_counts_forl8sr = pd.DataFrame(rows)

In [12]:
df_l8sr_image_counts_forl8sr = df_l8sr_image_counts_forl8sr.fillna(100)

In [13]:
df_l8sr_image_counts_forl8sr.to_csv(r'data\datasets\image_counts_by_cloud_lsr8.csv')

In [21]:
df_l8sr_image_counts_forl8sr.groupby(['month','cloud_cover',]).agg(sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,year,size
month,cloud_cover,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.0,14112,0
1,1.0,14112,27
1,2.0,14112,35
1,5.0,14112,59
1,10.0,14112,90
...,...,...,...
12,1.0,14112,12
12,2.0,14112,25
12,5.0,14112,51
12,10.0,14112,94


In [31]:
df_l8sr_image_counts_forl8sr[df_l8sr_image_counts_forl8sr['month'] == 8].groupby(['month','cloud_cover',]).agg(sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,year,size
month,cloud_cover,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8,0.0,1645,14112,0
8,1.0,1729,14112,592
8,2.0,1813,14112,673
8,5.0,1897,14112,838
8,10.0,1981,14112,992
8,100.0,1561,14112,2160


In [40]:
df_l8sr_image_counts_forl8sr[(df_l8sr_image_counts_forl8sr['month'] == 7)&(df_l8sr_image_counts_forl8sr['cloud_cover'] >=5.0)].groupby(['year','month']).agg(sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,size,cloud_cover
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2013,7,126,494,115.0
2014,7,342,481,115.0
2015,7,558,493,115.0
2016,7,774,659,115.0
2017,7,990,683,115.0
2018,7,1206,590,115.0
2019,7,1422,554,115.0


## Get image counts L5SR

In [15]:
# Sample cloudcover
feat_dict = {}
years = list(range(2001,2013))
months = list(range(1,13))
cloud_covers = [0,1,2,5,10]

rows = []
for y in tqdm(years, leave = False):

    # No clouds
    ee_list = ee_months.map(
                        lambda m: l5sr.filter(ee.Filter.calendarRange(y, y, 'year'))
                        .filter(ee.Filter.calendarRange(m, m, 'month'))
                        .filterBounds(deforest_hotspots)
                        .size()).flatten().getInfo()
    
    for m,month_value in enumerate(ee_list):
        row = {}
        row['year'] = y
        row['month'] = m+1
        row['size'] = month_value
        rows.append(row)
        
    for c in tqdm(cloud_covers, leave = False):

        ee_list = ee_months.map(
                lambda m: l5sr.filter(ee.Filter.calendarRange(y, y, 'year'))
                .filter(ee.Filter.calendarRange(m, m, 'month'))
                .filterBounds(deforest_hotspots).filterMetadata("CLOUD_COVER","less_than", c)
                .size()).flatten().getInfo()
        
        for m,month_value in enumerate(ee_list):
            row = {}
            row['cloud_cover'] = c
            row['year'] = y
            row['month'] = m+1
            row['size'] =  month_value
            rows.append(row)
            
            

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

In [16]:
df_image_counts_for_l5sr = pd.DataFrame(rows)

In [17]:
df_image_counts_for_l5sr = df_image_counts_for_l5sr.fillna(100)

In [18]:
df_image_counts_for_l5sr.to_csv(r'data\datasets\image_counts_by_cloud_l5sr.csv')

In [39]:
df_image_counts_for_l5sr[(df_image_counts_for_l5sr['month'] == 7)&(df_image_counts_for_l5sr['cloud_cover'] >=5.0)].groupby(['year','month']).agg(sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,size,cloud_cover
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1
2001,7,449,115.0
2002,7,6,115.0
2003,7,530,115.0
2004,7,417,115.0
2005,7,590,115.0
2006,7,460,115.0
2007,7,474,115.0
2008,7,561,115.0
2009,7,484,115.0
2010,7,352,115.0
