In [179]:
import ee
import pandas as pd
import pickle
import time

# Initialize the library.
ee.Initialize()

### Lead data & create functions

Adapt link to local data of Bastin paper.  

get_pix() reduces an ee.image object to the band values (currently just takes all the information rather than specified band values).  
get_img() selects images from an imagecollection and reduces them to a median image.

In [None]:
df = pd.read_csv("../data/bastin_db_cleaned.csv", sep=",")

def get_pix(df, img_list, bands):
    ''' Takes a dataframe with the coordinates and a list of images,
        returns a list of dictionnaries with the respective features.'''
    l1 = df["longitude"]
    l2 = df["latitude"]
    l1.index = range(len(l1))
    l2.index = range(len(l2))
    
    feat_list = []
    i = 0
    for i in range(0,len(img_list)):
        if i % 10 == 0:
            print("No. in progress: "+str(i))
        p = ee.Geometry.Point(l1[i],l2[i])
        img = img_list[i].reduceRegion(ee.Reducer.first(),p,30).getInfo()

        feat_list.append(img)    
    return feat_list

def get_img(df, date = ['2015-01-01', '2015-12-31']):
    ''' Takes a dataframe with two coordinates columns and returns a list of images
        composed of the median of a filtered image collection of the LANDSET8 dataset.
        Filters: Timeframe (date), Image Quality (>=7), Cloud Cover (<= 0.25)'''
    l1 = df["longitude"]
    l2 = df["latitude"]
    d = date
    img_list = []

    for i in range(len(df)):

        p = ee.Geometry.Point(l1[i],l2[i])
        dataset = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR').filterDate(d[0], d[1]).filterBounds(p)
        dataset = dataset.filterMetadata('IMAGE_QUALITY_TIRS', 'not_less_than', 7)
        dataset = dataset.filterMetadata('CLOUD_COVER', 'not_greater_than', .25)

        img = dataset.median()

        img_list.append(img)
    
    return img_list

In [181]:
d = df.loc[df["dryland_assessment_region"] == 'Australia']

In [182]:
# Create image list.
%time _ = get_img(df = d, date = ['2015-01-01', '2015-12-31'])

CPU times: user 5.52 s, sys: 63.2 ms, total: 5.58 s
Wall time: 5.6 s


In [183]:
# Example pull of features.
%time feats = get_pix(df = d.iloc[0:5,], img_list = _[0:5], bands = [])

No. in progress: 0
CPU times: user 150 ms, sys: 123 µs, total: 151 ms
Wall time: 5.16 s


In [184]:
feats
len(d)

15104

### Loop over index range of dataframe
Fynn takes 0 to 5000.
Luca takes 5000 to 10000.
Dario takes 10000 to 15000.
Final 104 will be added afterwards.
Same ending-starting value might serve as comparision.

In [None]:
images = get_img(df = d, date = ['2015-01-01', '2015-12-31'])
features = []
for j in range(0,6):
    ix_l, ix_u = j*1000, (j+1)*1000
    print('Range: '+str(ix_l)+' to '+str(ix_u)+'.')
    feats = get_pix(df = d.iloc[ix_l:ix_u,], img_list = images[ix_l:ix_u], bands = [])
    features.extend(feats)
    time.sleep(60) # Required, otherwise G Earth Engine blocks the getInfo at some point

### Storage

In [None]:
with open("/home/dario/_dsp/data/australia_2015_lower_upper.pkl", "wb") as f:
    pickle.dump(features, f, pickle.HIGHEST_PROTOCOL)