## Data Extraction - July 11

In [7]:
#Initialize ee
import ee
ee.Initialize()

In [8]:
#Import other packages
import pandas as pd
from IPython.display import Image

In [9]:
#Import the coordinate data as a dataframe
dfPoints = pd.read_csv('../data/station_locations.csv',index_col='id')

Image collections: 
* [MODOCGA.006 Terra Ocean Reflectance Daily Global 1km](https://explorer.earthengine.google.com/#detail/MODIS%2F006%2FMODOCGA)
* [MYDOCGA.006 Aqua Ocean Reflectance Daily Global 1km](https://explorer.earthengine.google.com/#detail/MODIS%2F006%2FMYDOCGA)
* [MOD09A1.006 Terra Surface Reflectance 8-Day Global 500m](https://explorer.earthengine.google.com/#detail/MODIS%2F006%2FMOD09A1)
* [Sentinel-3 OLCI EFR: Ocean and Land Color Instrument Earth Observation Full Resolution](https://explorer.earthengine.google.com/#detail/COPERNICUS%2FS3%2FOLCI)

In [10]:
#Get the image collections
colMODIS = ee.ImageCollection('MODIS/006/MOD09A1')
colMODISTerra = ee.ImageCollection('MODIS/006/MODOCGA')
colMODISAqua = ee.ImageCollection('MODIS/006/MYDOCGA')
colS3 = ee.ImageCollection('COPERNICUS/S3/OLCI')

In [11]:
#Filter bands (MODIS daily sensors have odd-bit bands at last 4 slots)
AquaBands = pd.DataFrame.from_dict(colMODISAqua.first().getInfo()['bands'])['id'].tolist()
TerraBands = pd.DataFrame.from_dict(colMODISTerra.first().getInfo()['bands'])['id'].tolist()

colMODISTerra_1 = colMODISTerra.select(TerraBands[:-4])
colMODISAqua_1 = colMODISAqua.select(AquaBands[:-4])

In [12]:
#Filter on time
startDate = '2015-03-16'
endDate = '2018-06-01'
#colMODIS = colMODIS.filterDate(startDate, endDate)

In [13]:
#Report how many images selected
imageCount = colMODIS.size().getInfo()
print("{} images returned".format(imageCount))

844 images returned


In [14]:
#Get the first image and show it's thumb
scene = ee.Image(colMODIS.first())
Image(url=scene.getThumbURL({min:0,max:0.3,}))

Iterate through each location and extract data 

In [15]:
def getData(idx,df,imgCollection,startDate,endDate,verbose=False):
    #Get the record at the index
    rec = dfPoints.iloc[idx]
    
    #Get the name of the record
    name = rec['name']
    
    #Get the coordinates and create a point object
    coords = rec[['lon','lat']].tolist()
    ptGeom = ee.Geometry.Point(coords)
    
    #Filter the image collection on the dates
    dateColl = imgCollection.filterDate(startDate,endDate)
    if verbose:
        print("{} images in date range".format(dateColl.size().getInfo()))
    
    #Filter the image collection on the point
    ptColl = dateColl.filterBounds(ptGeom)
    if verbose:
        print("{} images in geographic range".format(ptColl.size().getInfo()))
    
    #Extract information at the point
    ptInfo = ptColl.getRegion(ptGeom,1).getInfo()
    
    #Convert everything but the first row to a dataframe
    dfData = pd.DataFrame(ptInfo[1:])
    
    #Set the columns to the first row
    dfData.columns = ptInfo[0]
    
    #Convert timestamp to a time
    dfData['time'] = pd.to_datetime(dfData['time']/1000,unit='s')
    
    #Insert the name as a column value
    dfData.insert(0,'name',name)
    
    #Return the dataframe
    return dfData

In [16]:
#MODIS 8-day
dfs = []
for idx in range(len(dfPoints)):
    print(idx,end=' ')
    dfX = getData(idx,dfPoints,colMODIS,startDate,endDate)
    dfs.append(dfX)
dfOut = pd.concat(dfs)
dfOut.to_csv("../data/MODIS.csv",index=False)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 

In [17]:
#MODIS Terra
dfs = []
for idx in range(len(dfPoints)):
    print(idx,end=' ')
    dfX = getData(idx,dfPoints,colMODISTerra_1,startDate,endDate)
    dfs.append(dfX)
dfOut = pd.concat(dfs)
dfOut.to_csv("../data/MODISTerra.csv",index=False)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 

In [18]:
#MODIS Aqua
dfs = []
for idx in range(len(dfPoints)):
    print(idx,end=' ')
    dfX = getData(idx,dfPoints,colMODISAqua_1,startDate,endDate)
    dfs.append(dfX)
dfOut = pd.concat(dfs)
dfOut.to_csv("../data/MODISAqua.csv",index=False)

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 

In [19]:
#MODIS Aqua
dfs = []
for idx in range(3):
    print(idx,end=' ')
    dfX = getData(idx,dfPoints,colMODISAqua_1,startDate,endDate)
    dfs.append(dfX)
dfOut = pd.concat(dfs)
dfOut.to_csv("../data/MODISAquaX.csv",index=False)

0 1 2 