Notebook authors: Elena Gronskaya, Özgün Haznedar 

This notebook is used for:
- connecting to Google Earth Engine and scanning a selected geographical area,
downloading 80 km^2 images from two satellite sources (landsat-8, 
low-resolution and sentinel-2, high resolution) to Google Drive
- prior to downloading, images with clouds and no-data pixels are discarded, and 
a temporal matching is done, so that only one image pair is downloaded per 
month, with a restriction that the images are within max_days_apart

In [None]:
# connect to Google Earth Engine python API

import ee

# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Shift center point by specified distance and get adjacent image with small overlap

# Latitude: 1 deg = 110.574 km
# Longitude: 1 deg = 111.320*cos(latitude) km

import math

def shift_center_point(lng, lat, dist=4000, shift_lng = True, shift_lat=True):

  dist_to_shift = dist*1.95
  lat_scale = 110.574*1000
  lng_scale =  111.320*math.cos(lat)*1000

  if shift_lat==True:
    new_lat = round((lat + dist_to_shift/lat_scale),4)
  else:
    new_lat = lat

  if shift_lng == True:  
    new_lng = round((lng + dist_to_shift/lng_scale),4)
  else:
    new_lng = lng

  return(new_lng, new_lat)

In [None]:
# Alternative method of changing image location: shift center point in even divs steps between two locations

import math

def get_shift_amount(lng, lat, final_lng, final_lat, divs):
  lat_to_shift = (final_lat - lat)/divs
  lng_to_shift = (final_lng - lng)/divs

  return(lng_to_shift,lat_to_shift)

def shift_center_point_by_deg(lng, lat, lng_to_shift, lat_to_shift, dist=4000, shift_lng = True, shift_lat=True):

  if shift_lat==True:
    new_lat = round((lat + lat_to_shift),4)
  else:
    new_lat = lat

  if shift_lng == True:  
    new_lng = round((lng + lng_to_shift),4)
  else:
    new_lng = lng

  return(new_lng, new_lat)

In [None]:
def tif_exporter(lon, lat, folder_name, meter_surrounding=4000, l8_cloud_threshold=5, s2_cloud_threshold=5, max_days_apart=7):

  import numpy as np
  import pandas as pd


  # Define location (lon, lat center point)
  poi = ee.Geometry.Point(lon, lat)

  # Define a region of interest 
  roi = poi.buffer(meter_surrounding) # distance surroinding center point

  scale_l8 = 30  # landsat scale in meters
  scale_s2 = 10  # sentinel scale in meters

  l8_path = "LANDSAT/LC08/C02/T1_L2"
  s2_path = "COPERNICUS/S2_SR"

  l8 = ee.ImageCollection(l8_path) 
  s2 = ee.ImageCollection(s2_path) 

  # Initial date of interest (inclusive).
  i_date = '2017-01-01'

  # Final date of interest (exclusive).
  f_date = '2022-04-01' 

  # Selection of appropriate bands and dates for LST.
  l8 = l8.select("SR_B4","SR_B3","SR_B2").filterDate(i_date, f_date)
  s2 = s2.select("B4","B3","B2").filterDate(i_date, f_date)

  # Get the data for the pixel intersecting the point in area.
  l8_poi = l8.getRegion(poi, scale_l8).getInfo()
  s2_poi = s2.getRegion(poi, scale_s2).getInfo()

  def ee_array_to_df(arr, list_of_bands):
    """Transforms client-side ee.Image.getRegion array to pandas.DataFrame."""
    df = pd.DataFrame(arr)

    # Rearrange the header.
    headers = df.iloc[0]
    df = pd.DataFrame(df.values[1:], columns=headers)

    # Remove rows without data inside.
    df = df.dropna()
    
    # Convert the data to numeric values.
    for band in list_of_bands:
      df[band] = pd.to_numeric(df[band], errors='coerce')

    # Convert the time field into a datetime.
    df["datetime"] = pd.to_datetime(df["time"], unit='ms')
    df["day"] = df["datetime"].dt.date
    df["month"] = df["datetime"].dt.to_period('M') 
    df["date"] = df["datetime"].dt.to_period('D') 

    return df

  def l8_cloud_cover(row):
    try :
      return ee.Image(l8_path+"/"+row.id).get("CLOUD_COVER").getInfo()
    except:
      return 100

  def s2_cloudy_pixel_percentage(row):
    try :
      return ee.Image(s2_path+"/"+row.id).get("CLOUDY_PIXEL_PERCENTAGE").getInfo()
    except:
      return 100

  #dataframe for landsat images
  l8_df = ee_array_to_df(l8_poi,["SR_B4","SR_B3","SR_B2"])
  l8_df["l8_cloud_cover"] = l8_df.apply(l8_cloud_cover, axis=1)
  l8_df = l8_df[l8_df.l8_cloud_cover < l8_cloud_threshold].reset_index()

  # selecting no cloud image per month
  l8_df.sort_values(by= ["month","l8_cloud_cover"],inplace=True)
  l8_df.drop_duplicates(subset="month" , inplace=True)  #keeps first one with lower cloud score

  #dataframe for sentinel2 images 
  s2_df = ee_array_to_df(s2_poi,["B4","B3","B2"])
  s2_df["s2_cloudy_pixel_percentage"] = s2_df.apply(s2_cloudy_pixel_percentage, axis=1)
  s2_df = s2_df[s2_df.s2_cloudy_pixel_percentage < s2_cloud_threshold].reset_index()

  # selecting no cloud image per month
  s2_df.sort_values(by= ["month","s2_cloudy_pixel_percentage"],inplace=True)
  s2_df.drop_duplicates(subset="month" , inplace=True)  #keeps first one with lower cloud score

  df = pd.merge(left=l8_df, right=s2_df, on="month", suffixes=("_L8", "_S2"))
  df["time_difference"] = abs(df.day_L8 - df.day_S2).apply(lambda row : row / np.timedelta64(1, 'D'))
  df = df[df.time_difference<=max_days_apart] 
  df.sort_values(by= ["month","s2_cloudy_pixel_percentage"],inplace=True)
  df.drop_duplicates(subset="month" , inplace=True)  #keeps first one with lower s2 cloud score
  df=df.reset_index()

  #Function that exports to drive
  def exporter(img, source, scale, date):
    task = ee.batch.Export.image.toDrive(image=img,
                                        folder=folder_name,
                                        scale=scale,
                                        shardSize=256,
                                        region=roi,
                                        fileNamePrefix=str(int(abs(1000000*(lon*lat))))+
                                         "_"+str(date)+"_"+source+"_"+str(lon)+"_"+str(lat))
    task.start()

  #Export the images to drive
  for idx, month in enumerate(df.month):
    landsat = df.id_L8[idx]
    l8_img = ee.Image(l8_path+"/"+landsat)
    l8_img = l8_img.select("SR_B4","SR_B3","SR_B2")
    exporter(l8_img, "L8", 30, df.date_L8[idx])

    sentinel = df.id_S2[idx]
    s2_img = ee.Image(s2_path+"/"+sentinel)
    s2_img = s2_img.select("B4","B3","B2")
    exporter(s2_img, "S2", 10, df.date_S2[idx])


## Example usage below

In [None]:
# for Honduras
c_lat = 14.08
c_lon = -87.26

# moving North East in even steps

for i in range(60):
  tif_exporter(c_lon, c_lat, "final_images_honduras")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000)


In [None]:
# for Madagascar West Coast

divs = 9 # calculated to be approx 80 km, as images are 80x80 km in size
(lng_to_shift, lat_to_shift) = get_shift_amount(43.90, -24.38, 44.74, -18.29, divs=divs)
(c_lon, c_lat) = (43.90, -24.38)

# moving from a starting lat/long to a destination lat/long in even steps

for i in range(divs):
  tif_exporter(c_lon, c_lat, "final_images_madagascar")
  (c_lon, c_lat) = shift_center_point_by_deg(c_lon, c_lat, lng_to_shift, lat_to_shift)

In [None]:
# for Madagascar East Coast
(lng_to_shift, lat_to_shift) = get_shift_amount(46.85, -25.03, 50.08, -14.05, divs=16)
(c_lon, c_lat) = (46.85, -25.03)
for i in range(16):
  tif_exporter(c_lon, c_lat, "final_images_madagascar")
  (c_lon, c_lat) = shift_center_point_by_deg(c_lon, c_lat, lng_to_shift, lat_to_shift)

In [None]:
# for California West Coast
c_lat = 38.94
c_lon = -123.62

for i in range(13):
  tif_exporter(c_lon, c_lat, "final_images_california")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000)

In [None]:
# for Angola and South DRC
c_lat = -7.88
c_lon = 13.19

for i in range(23):
  tif_exporter(c_lon, c_lat, "final_images_angola_drc")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000, shift_lng = True, shift_lat=False)

In [None]:
# for Angola and South DRC higher north
c_lat = -7.42
c_lon = 13.19

for i in range(23):
  tif_exporter(c_lon, c_lat, "final_images_angola_drc")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000, shift_lng = True, shift_lat=False)

In [None]:
# for Angola and South DRC higher north
c_lat = -5.92
c_lon = 12.48

for i in range(23):
  tif_exporter(c_lon, c_lat, "final_images_angola_drc3")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000, shift_lng = True, shift_lat=False)

In [None]:
# for DRC 
c_lat = -5.05
c_lon = 12.14

for i in range(23):
  tif_exporter(c_lon, c_lat, "final_images_drc")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000, shift_lng = True, shift_lat=False)

In [None]:
# for Australia
(lng_to_shift, lat_to_shift) = get_shift_amount(146.36, -38.78, 153.40, -28.35, divs=17)
(c_lon, c_lat) = (146.36, -38.78)
for i in range(17):
  tif_exporter(c_lon, c_lat, "final_images_australia")
  (c_lon, c_lat) = shift_center_point_by_deg(c_lon, c_lat, lng_to_shift, lat_to_shift)

In [None]:
# for moving northeast across DRC, South Sudan, and Ethiopia
c_lat = -7.09
c_lon = 13.03
for i in range(50):
  tif_exporter(c_lon, c_lat, "final_images_drc_ssudan_eth")
  (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000)

In [None]:
# for Portugal 

lng_range = [-8.13, -7.52]

for l in lng_range:

  c_lat = 39.26
  c_lon = l

  for i in range(6):
    tif_exporter(c_lon, c_lat, "final_images_portugal")
    (c_lon, c_lat) = shift_center_point(c_lon, c_lat, dist=4000, shift_lng = False, shift_lat=True)