In [1]:
import pandas as pd
import geopandas as gp
import os
from shapely.geometry import Polygon
from shapely.geometry import Point
from matplotlib import pyplot
import matplotlib.pyplot as plt
from ipyleaflet import Map, GeoData, basemaps, LayersControl
import json
import datetime
import pickle
import rasterio as rio
import rasterio.plot
import rasterio.crs
from ipypb import track
from ipypb import chain

os.chdir(r"C:\Users\User\Documents\Work\SDGs and AI\6.3.2")

In [2]:
### List of map tags https://wiki.openstreetmap.org/wiki/Map_Features

### Create list of coordinates for retrieval

In [2]:
### Read in EEA waterbase selected columns

col_to_keep = ['monitoringSiteIdentifier','parameterWaterBodyCategory','observedPropertyDeterminandCode',
               'phenomenonTimeSamplingDate','resultObservedValue',]

wqdb = pd.read_csv('./EEA_water_base/Waterbase_v2018_1_T_WISE4_DisaggregatedData.csv',usecols=col_to_keep,parse_dates=True)

### Read site metadata

site_meta = pd.read_csv('./EEA_water_base/Waterbase_v2018_1_WISE4_MonitoringSite_DerivedData.csv')

In [3]:
### filter for just sites with chlorophyll data and just lakes

deter = 'EEA_3164-01-0'

chla_data = wqdb[(wqdb.observedPropertyDeterminandCode==deter)&(wqdb.parameterWaterBodyCategory=='LW')]

chla_data['phenomenonTimeSamplingDate'] = pd.to_datetime(chla_data.phenomenonTimeSamplingDate)

del(wqdb)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [4]:
### Join lon lat to site data and create data and site list

chla_data = chla_data.merge(site_meta[['monitoringSiteIdentifier','lon','lat']],how='left',on='monitoringSiteIdentifier')

chla_data = chla_data[chla_data.lon.notnull()]

### create list of sites from data
chla_sites = chla_data.monitoringSiteIdentifier.unique()

In [5]:
### create dictionary of site ids

#del(sites)

sites = dict()

for site in chla_sites:
    sites[site] = site

In [6]:
### Convert data to spatial and change CRS
## NOTE: WISE data reported in ETRS89-GRS80 ('+proj=longlat +ellps=GRS80 +no_defs'), USGS use  '+proj=utm +zone=18 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'

geometry = [Point(xy) for xy in zip(chla_data['lon'],chla_data['lat'])]

crs= '+proj=longlat +ellps=GRS80 +no_defs' #'epsg:4258' #

chla_data = gp.GeoDataFrame(chla_data, crs=crs, geometry=geometry)

chla_data = chla_data.to_crs('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs')

In [7]:
### create df in dictionary to receive API search data + add lon-lat + flagged for whether site has been checked

for sid in sites:
    
    dates = pd.Series(chla_data[(chla_data.monitoringSiteIdentifier==sid)].phenomenonTimeSamplingDate.unique())
    data = pd.DataFrame(data=None,index=dates,columns=['7_scene_tf','7_scene_id','7_scene_dt','7_file',
                                                      '8_scene_tf','8_scene_id','8_scene_dt','8_file'])
    
    sites[sid] = dict()
    sites[sid]['data'] = data
    sites[sid]['lon'] =  chla_data[chla_data.monitoringSiteIdentifier==sid].geometry.x.unique()[0]
    sites[sid]['lat'] =  chla_data[chla_data.monitoringSiteIdentifier==sid].geometry.y.unique()[0]
    sites[sid]['checked'] = {'checked': 0 }

### Landsat API call to check for coverage

https://earthexplorer.usgs.gov/inventory/documentation#access

https://pypi.org/project/landsatxplore/

In [8]:
import landsatxplore.api
from landsatxplore.earthexplorer import EarthExplorer

user = 'clement.attwood.18@ucl.ac.uk'
password = 'm9.iETxKMtZ6B,q'

api = landsatxplore.api.API(user,password)

In [16]:
del(sites['FISW_494'])

In [None]:
# Create call for each site and each date

lag = 1 # number of days to search either side of sample date

platforms = ['LANDSAT_ETM_C1','LANDSAT_8_C1'] 

for sid in track(sites):
        
    dates = sites[sid]['data'].index
    sites[sid]['checked'] = 1
    
    for date,platform in track(zip(list(dates)*2,sorted(platforms*len(dates))),len(dates)*2):
        
        start = date-datetime.timedelta(days=lag)
        end = date+datetime.timedelta(days=lag)
        
        date = date.strftime("%Y-%m-%d")

        scenes = api.search(dataset = platform,
                            latitude = sites[sid]['lat'],
                            longitude = sites[sid]['lon'],
                            start_date = start.strftime("%Y-%m-%d"),
                            end_date = end.strftime("%Y-%m-%d"),
                            max_cloud_cover = 10)
        
        if platform=='LANDSAT_ETM_C1':
            
            sites[sid]['data'].loc[date,'7_scene_tf'] = len(scenes)
            
            if len(scenes)>0:
                sites[sid]['data'].loc[date,'7_scene_id'] = scenes[0]['entityId']
                sites[sid]['data'].loc[date,'7_scene_dt'] = scenes[0]['modifiedDate']
                pickle.dump(scenes,open('./Landsat/Scenes/scene_'+str(scenes[0]['entityId'])+'.p','wb'))
            
        else:
        
            sites[sid]['data'].loc[date,'8_scene_tf'] = len(scenes)
            
            if len(scenes)>0:
                sites[sid]['data'].loc[date,'8_scene_id'] = scenes[0]['entityId']
                sites[sid]['data'].loc[date,'8_scene_dt'] = scenes[0]['modifiedDate']
                pickle.dump(scenes,open('./Landsat/Scenes/scene_'+str(scenes[0]['entityId'])+'.p','wb'))
                
                

In [11]:
pickle.dump(sites,open('sites.p','wb'))

### Download scenes

In [19]:
# return dict for v1 if {k2: v2} == {'checked': 1}

{k1:{k2: v2 for k2, v2 in v1.items() if {k2: v2} == {'checked': 1}} for k1, v1 in sites.items()}

{'FISW_494': {'checked': 1},
 'FISW_198': {'checked': 1},
 'FISW_467': {'checked': 1},
 'FISW_511': {'checked': 1},
 'LTL71': {'checked': 1},
 'EESJA1666003': {'checked': 1},
 'FISW_524': {'checked': 1},
 'FISW_474': {'checked': 1},
 'FISW_530': {'checked': 1},
 'FISW_2686': {'checked': 1},
 'FISW_719': {'checked': 1},
 'FISW_517': {'checked': 1},
 'FISW_611': {'checked': 1},
 'ATSE50201000': {'checked': 1},
 'ATSE50501000': {'checked': 1},
 'ATSE50301000': {'checked': 1},
 'ATSE80101000': {'checked': 1},
 'ATSE40501000': {'checked': 1},
 'ATSE40101000': {'checked': 1},
 'ATSE90102000': {'checked': 1},
 'ATSE20401000': {'checked': 1},
 'ATSE50102000': {'checked': 1},
 'ATSE20701000': {'checked': 1},
 'ATSE20501000': {'checked': 1},
 'ATSE50101000': {'checked': 1},
 'ATSE40201000': {'checked': 1},
 'ATSE60301000': {'checked': 1},
 'ATSE60201000': {'checked': 1},
 'ATSE70101000': {'checked': 1},
 'ATSE70301000': {'checked': 1},
 'ATSE70201000': {'checked': 1},
 'ATSE20601000': {'checked'

In [28]:
sites['FISW_524']['data']['7_scene_dt']

2015-07-02    NaN
2016-07-05    NaN
2016-08-24    NaN
2016-10-19    NaN
2017-07-18    NaN
2017-08-29    NaN
2017-10-18    NaN
Name: 7_scene_dt, dtype: object

In [39]:
#k:[v[i] for i,x in enumerate(v) if data['b'][i] >= 20] for k,v in data.items()}

{k1:{k2: v2 for k2, v2 in v1.items() if {k2: v2} == {'data': data['7_scene_tf']}} for k1, v1 in sites.items()}

TypeError: list indices must be integers or slices, not str