In [1]:
import pandas as pd
import ee
from collections import defaultdict
import numpy as np
import datetime
ee.Initialize()


# Parameters

In [37]:
# LOCATION OF CSV FILE
DATA = 'data/US_Wheat/kansas_wheat_square.csv'

# Download only pictures of rice and wheat
DOWNLOAD_ONLY_POSITIVE_EXAMPLE = False
DOWNLOAD_ONLY_NEGATIVE_EXAMPLE = False

# NUMBER OF FILES TO DOWNLOAD
# set to None to download all files
DOWNLOAD_NB_FILES = 10

# RADIUS AROUND COORD IN METERS
# This is the number of meter around the point coordinate to include in the picture
RADIUS_AROUND = 400

# RANGES FOR BANDS
# Values tested on different location in nepal
# Hypothesis: Should be the same for all images
RANGE_MIN = 0
RANGE_MAX = 2000

# RANGE FOR DATES
# CSV shows one date per observation.
# However, we need to take several pictures to select ones without clouds
# We look for RANDE_DATE weeks around the date
RANDE_DATE = 2
BEGIN_TOI = '10/02/2016'
END_TOI = '17/02/2016'

# GOOGLE DRIVE FOLDER
GDRIVE_FOLDER = 'us-wheat-range'

# Import CSV data a nd remove NaNs

In [6]:
df = pd.read_csv(DATA)
print(f'Len of input data: {len(df)}')

Len of input data: 2920


In [7]:
df

Unnamed: 0.1,Unnamed: 0,names,long_min,lat_min,long_max,lat_max
0,0,tile_12600_9300.tif,-97.766670,37.566165,-97.661908,37.647905
1,1,tile_20700_1500.tif,-94.957821,39.661487,-94.850692,39.742721
2,2,tile_18300_8400.tif,-95.809562,37.819511,-95.706025,37.900016
3,3,tile_4200_10200.tif,-100.631981,37.249731,-100.524678,37.333818
4,4,tile_11100_4500.tif,-98.321651,38.842009,-98.214457,38.924199
5,5,tile_13200_6300.tif,-97.577841,38.371853,-97.472132,38.453426
6,6,tile_2400_600.tif,-101.433729,39.790334,-101.321790,39.875066
7,7,tile_300_8700.tif,-101.994702,37.592381,-101.885511,37.677544
8,8,tile_16500_3000.tif,-96.437906,39.264445,-96.332146,39.345072
9,9,tile_19200_6300.tif,-95.496080,38.380443,-95.391409,38.461207


# Google Earth Engine download


In [8]:
dataset = ee.ImageCollection('COPERNICUS/S2').select(['B4', 'B3', 'B2'])

In [9]:
df.columns

Index(['Unnamed: 0', 'names', 'long_min', 'lat_min', 'long_max', 'lat_max'], dtype='object')

In [17]:
# Create list with a tuple per point: File name - x - y - date
coords = [(point[1][0], point[1][1], point[1][2], point[1][3], point[1][4]) for point in df.loc[:, ['names', 'long_min', 'lat_min', 'long_max', 'lat_max']].iterrows()]

In [19]:
# File name - x - y - date - type
coords[0]

('tile_12600_9300.tif',
 -97.76666993223272,
 37.56616469370293,
 -97.6619080559162,
 37.64790464522484)

In [38]:
# Generate a rectangle containing the circle (centered on the coordinate) with radius RADIUS_AROUND
def get_geometry_radius(geometry_point):
    return ee.Geometry.Rectangle([coord[:, 0].min(), coord[:, 1].min(), coord[:, 0].max(), coord[:, 1].max()])

# Generate the dates around the observation date
def date_range_to_collect():
  
    d, m, y = BEGIN_TOI.split('/')
        
    target_date = datetime.date(int(y), int(m), int(d))
    delta = datetime.timedelta(weeks=RANDE_DATE)
    return target_date-delta, target_date+delta

def generate_image(image_collection, image_name, long_min, lat_min, long_max, lat_max, debug=True):
    if debug: print(f'Working on {image_name}')

    geometry_radius= ee.Geometry.Rectangle([long_min, lat_min, long_max, lat_max])
    
    spatialFiltered = image_collection.filterBounds(geometry_radius)

    date_range = date_range_to_collect()
    if debug: print('date range:' + str(date_range[0]) + str(date_range[1]))
    temporalFiltered = spatialFiltered.filterDate(str(date_range[0]), str(date_range[1]))

    # The darker pixels have the least clouds
    least_clouds = temporalFiltered.min()
    #least_clouds = temporalFiltered.sort('CLOUD_COVER').first()

#ar scene = ee.Image(sorted.first())

    testimg = least_clouds.visualize(bands=['B4', 'B3', 'B2'], min=RANGE_MIN, max=RANGE_MAX)
    #testimg = least_clouds.select('B.+')
    #testimg = testimg.resample('bicubic')
    task = ee.batch.Export.image.toDrive(testimg, folder=GDRIVE_FOLDER, region=geometry_radius.getInfo()['coordinates'][0], description=image_name.split('.')[0], scale=10)
    task.start()


In [17]:
df['type'].value_counts()

other    1015
rice      387
wheat      18
Name: type, dtype: int64

In [39]:
dataset_collection = ee.ImageCollection('COPERNICUS/S2')


for i, point in enumerate(coords):
    if DOWNLOAD_NB_FILES is not None and i >= DOWNLOAD_NB_FILES:
        break
    generate_image(dataset_collection, point[0], point[1], point[2], point[3], point[4])

Working on tile_12600_9300.tif
date range:2016-01-272016-02-24
Working on tile_20700_1500.tif
date range:2016-01-272016-02-24
Working on tile_18300_8400.tif
date range:2016-01-272016-02-24
Working on tile_4200_10200.tif
date range:2016-01-272016-02-24
Working on tile_11100_4500.tif
date range:2016-01-272016-02-24
Working on tile_13200_6300.tif
date range:2016-01-272016-02-24
Working on tile_2400_600.tif
date range:2016-01-272016-02-24
Working on tile_300_8700.tif
date range:2016-01-272016-02-24
Working on tile_16500_3000.tif
date range:2016-01-272016-02-24
Working on tile_19200_6300.tif
date range:2016-01-272016-02-24
