In [None]:
import ee
ee.Authenticate()
ee.Initialize()
print('GEE version:', ee.__version__)

In [5]:
import folium
import os

from matplotlib.pyplot import cm

In [3]:
class GEE:
    
    def __init__(self, dateStart, dateEnd, lat, lon, buffer_size=10):
        self.dateStart = dateStart
        self.dateEnd = dateEnd
        self.lat = lat
        self.lon = lon
        self.buffer_size = buffer_size
        self.roi = ee.Geometry.Point([self.lon, self.lat]).buffer(self.buffer_size)
    
        dataset = ee.FeatureCollection("FAO/GAUL/2015/level0")\
            .filter(ee.Filter.eq('ADM0_NAME', 'Netherlands'))
        self.NL = ee.Image().float().paint(dataset, 'ADM0_CODE')
        
#         self.colormap = cm.LinearColormap(colors=['#d0543d', '#ffffff', '#6b9ed6'], vmin=0, vmax=255)
    
    def map_layers(self, mapids, info=None, title=None, file_out='plots/', zoom=12, dataframe=None, image_locale=None):
        m = folium.Map(location=[52.1326, 5.2913], zoom_start=zoom, control_scale=True, attributionControl=False)
        for mapid, _title in mapids:
            folium.TileLayer(tiles=mapid['tile_fetcher'].url_format, attr=info,
                overlay=True, name=_title).add_to(m)
#         m.add_child(folium.LayerControl())
        
        if image_locale is not None:
            for image in image_locale:
                img = folium.raster_layers.ImageOverlay(
                    name=image.split('/')[-1][:-4].replace('_', ':'),
                    image=image,
        #             [[[2.395, 54.068], [2.395, 50.243],
        #               [8.042, 50.243], [8.042, 54.068]]]

                    bounds=[[50.243, 2.395], [54.068, 8.042]], # [[lat_min, lon_min], [lat_max, lon_max]]
                    opacity=0.9,
                    interactive=True,
                    cross_origin=False,
                    zindex=100,
                    overlay=True,
                    show=True if 'air quality' in image.lower() else False
                )

                img.add_to(m)
        
#         folium.TileLayer('openstreetmap', overlay=True).add_to(m)

#         folium.TileLayer(tiles=self.NL.getMapId({})['tile_fetcher'].url_format, attr=info,
#                 overlay=True, name='NL', opacity=0.35, zindex=1).add_to(m)   
#         m.add_child(self.colormap)
        
        popup_width, popup_height = 350, 300
        f1 = folium.FeatureGroup(name='Air quality sensors', show=True).add_to(m)
        
        for n, i in enumerate(dataframe.index.to_list()):
            alt_pop = '''<h6>Air quality sensor</h6><h5><b>{}</b></h5>
                              <table style="width:100%">
                                  <tr>
                                      <td><b>number</b></td>
                                      <td>{}</td>
                                  </tr>
                                  <tr>
                                      <td><b>lat, lon</b></td>
                                      <td>{}, {}</td>
                                  </tr>
                                  <tr>
                                      <td><b>components</b></td>
                                      <td>{}</td>
                                  </tr>
                              </table>
                              '''.format(
                              i,
                              dataframe.loc[i]['number'],
                              dataframe.loc[i]['lat'],
                              dataframe.loc[i]['lon'],
                              ', '.join(dataframe.loc[i]['components'].split('|')),
                              dataframe.loc[i]['id'])
#             iframe = folium.IFrame(html=open(f'{self.file_out}/time_series_of_aq.html', 'r').read(), width=popup_width, height=popup_height)
#             popup = folium.Popup(iframe, max_width=2650)
            folium.CircleMarker([dataframe.loc[i]['lat'], dataframe.loc[i]['lon']],
                          radius=5,
                          color='black',
                          weight=3,
                          fill=True,
                          fillColor='black',
                          fillOpacity=0.5,
                          popup=None,
                          tooltip=None).add_to(m).add_to(f1)
        folium.LayerControl().add_to(m)
        out_name = f'{file_out}gee.html'
        
        m.save(out_name)

        iframe = folium.IFrame(html=open(out_name, 'r').read(), width='100%', height='100%')
        iframe.save(out_name)

        return m
    


    def get_satdata(self, sat_url, bands, clip=True):
        def get_timestamps(image):
            if clip:
                return image.addBands(image.metadata('system:time_start')).clip(self.roi)
            else:
                return image.addBands(image.metadata('system:time_start'))

        def to_metadata(image):
            mean_bands = [band + '_mean' for band in bands]
            mean_image = image.select(bands, mean_bands).reduceRegion(ee.Reducer.median(), self.roi, self.buffer_size * 2)
            for mean_band in mean_bands:
                mean_val = mean_image.get(mean_band)
                image_vals = ee.Algorithms.If(mean_val, mean_val, -9999)
                image = image.set(mean_band, image_vals)
            return image
        
        def remove_high_clouds(image):
            low_clouds = image.select('cloud_fraction').lte(0.3)
            return image.updateMask(low_clouds)
        
        if 'S5P' in sat_url and 'NO2' in sat_url:
            dataset = ee.ImageCollection(sat_url).map(remove_high_clouds)
            return dataset.filterBounds(self.roi).filterDate(dateStart, dateEnd).select(bands).map(get_timestamps).map(to_metadata)
        else:
            dataset = ee.ImageCollection(sat_url).select(bands)
            return dataset.filterBounds(self.roi).filterDate(dateStart, dateEnd).map(get_timestamps).map(to_metadata)

    def get_df_satdata(self, sat_data, bands, aliases):
        dates_sat_data = ee.List(sat_data.aggregate_array('system:time_start')).map(
            lambda time_start: ee.Date(time_start).format('YYYY-MM-dd HH:mm:ss')).getInfo()
        dict_sat_data = {'datetime': dates_sat_data}
        for band, alias in zip(bands, aliases):
            mean_sat_data = ee.List(sat_data.aggregate_array(band + '_mean')).getInfo()
            dict_sat_data.update({alias: mean_sat_data})
        df_sat_data = pd.DataFrame(dict_sat_data)
        df_sat_data['datetime'] = [pd.Timestamp(date).round('1h') for date in df_sat_data['datetime']]
        df_sat_data = df_sat_data.set_index('datetime')
#         df_sat_data = df_sat_data[(df_sat_data >= 0).all(1)]
        df_sat_data = df_sat_data.where(df_sat_data >= -9000, np.nan)
        df_sat_data = df_sat_data.loc[~df_sat_data.index.duplicated(keep='first')]
        return df_sat_data

In [None]:
# if SAT:
#     from PIL import Image, ImageMath
#     from matplotlib.pyplot import cm
#     from matplotlib.colors import ListedColormap
#     import os
    
#     for image in os.listdir('images/gee/tif_in'):

#         image_in = os.path.join('images/gee/tif_in', image)
#         image_out = os.path.join('images/gee/png_out', image[:-4] + '.png')

#         options_list = [
#             '-ot Byte',
#             '-of PNG',
#             '-b 1',
#             '-scale'
#         ]           

#         options_string = " ".join(options_list)

#         gdal.Translate(
#             image_out,
#             image_in,
#             options=options_string
#         )

#         im = Image.open(image_out)
        
#         if 'air quality' in image.lower():
#             data = np.array(im)
#             data_flip = data[::-1]
#             im = Image.fromarray(data_flip)
        
# #         data = np.array(im)
# #         data_alt = np.where(data == 0, -100, data)
# #         print(data_alt)
# #         im = Image.fromarray(data_alt)
        
# #         print(np.array(im))
#         custom_colormap = Visualizer().get_colormap([c3, c2, c1])

#         cmap = ListedColormap(custom_colormap)
        
#         # get colormap
#         ncolors = 256
#         color_array = cmap(range(ncolors))

#         # change alpha values
#         color_array[:,-1] = np.linspace(0.0, 1.0, ncolors) ** 0.000001

#         cmap = ListedColormap(color_array)

#         plt.imsave(image_out, im, cmap=cmap)
        
        

#         # im.save(image_out, cmap=cm.Reds)

In [None]:
# if SAT:    
#     dateStart, dateEnd = '2020-03-13', '2020-03-14'

#     images_S5P_NO2 = S5P_NO2.toList(S5P_NO2.size())
#     images_CFSV2 = CFSV2.toList(CFSV2.size())
#     images_G025 = G025.toList(G025.size()) 

#     gee = GEE(dateStart, dateEnd, lat, lon, buffer_size=10)

#     info = 'Satellite & forecasting data'
#     mapids = []

#     roi = ee.Geometry.Point([lon, lat]).buffer(10)

#     geometry = ee.Geometry.Polygon([[[2.395, 54.068], [2.395, 50.243],
#                                      [8.042, 50.243], [8.042, 54.068]]], None, False)
    
#     def getPercentiles(image):
#         percsLow = image.reduceRegion(ee.Reducer.percentile([1]), scale=5000, 
#                             geometry=geometry, maxPixels=1e9).getInfo()
#         percsHigh = image.reduceRegion(ee.Reducer.percentile([99]), scale=5000, 
#                             geometry=geometry, maxPixels=1e9).getInfo()
#         return percsLow, percsHigh
    
#     def normalize(image, origMins, origMaxs):
#         bandnames = image.bandNames().getInfo()
#         normalized = ee.Image()
#         for i in bandnames:
#             limiter = i
#             limit = image.select(limiter)\
#                 .expression(f'(b("{limiter}") - {origMins[limiter]})' \
#                 f' / ({origMaxs[limiter]} - {origMins[limiter]})').rename(limiter)
#             limit = limit.where(limit.gt(1), 1)
#             limit = limit.where(limit.lt(0), 0)
#             normalized = normalized.addBands(limit)
#         return normalized.select(bandnames)
    
#     def produce_image(image_list, bands, aliases, view_time):
#         for n, i in enumerate(image_list.getInfo()):
#             image = ee.Image(image_list.get(n)).clip(geometry)
#             timestamp = ee.Date(image.getInfo()['properties']['system:time_start']).format('YYYY-MM-dd HH:mm:ss').getInfo()

#     #         df_sat_data['datetime'] = [pd.Timestamp(date).round('1h') for date in df_sat_data['datetime']]

#             if timestamp == view_time:
#                 min_image, max_image = getPercentiles(image.select(bands))
#                 image_normalized = normalize(image.select(bands), min_image, max_image)
            
#                 for n, band in enumerate(bands):
#                     title = f'{aliases[n][:-4]} {timestamp}'

# #                     task = ee.batch.Export.image(image_normalized.select(band), title, {'scale': 2000, 'region': geometry})
# #                     task.start()
                    
# #                     while task.active():
# #                         print('Exporting image {}.'.format(title))
# #                         time.sleep(5)
# #                     print('Image {} exported.'.format(title))
                    
# #                     band_viz = {'bands': [band], 'min': 0, 'max': 1, 'palette': [c3, c2, c1]}
# #                     mapids.append([image_normalized.getMapId(band_viz), title])
    
#     produce_image(images_G025, bands_G025, aliases_G025, '2020-03-13 12:00:00')        
#     produce_image(images_CFSV2, bands_CFSV2, aliases_CFSV2, '2020-03-13 12:00:00')
#     produce_image(images_S5P_NO2, bands_S5P_NO2, aliases_S5P_NO2, '2020-03-13 12:59:49')

#     png_images = sorted(os.path.join('images/gee/png_out', image) for image in os.listdir('images/gee/png_out') if image[-3:] == 'png')
# gee.map_layers(mapids=mapids, info=info, title=title, zoom=7, dataframe=df_aq_stations, image_locale=png_images) if SAT else None # image_locale='images/gee/test_S5P3.png'

In [None]:
# if SAT:
    
#     info = 'Satellite & forecasting data'
#     mapids = []    
#     gee = GEE(dateStart, dateEnd, lat, lon, buffer_size=10)

#     png_images = sorted(os.path.join('images/gee/png_out', image) for image in os.listdir('images/gee/png_out') if image[-3:] == 'png')
# gee.map_layers(mapids=mapids, info=info, title=title, zoom=7, dataframe=df_aq_stations, image_locale=png_images) if SAT else None # image_locale='images/gee/test_S5P3.png'

In [None]:
# if SAT:
#     dateStart, dateEnd = START_DATE, END_DATE
#     dateStart, dateEnd = '2020-03-01', END_DATE
#     # https://developers.google.com/earth-engine/datasets/catalog/ECMWF_CAMS_NRT
#     bands_CAMS = ['total_aerosol_optical_depth_at_550nm_surface', 'particulate_matter_d_less_than_25_um_surface']
#     aliases_CAMS = ['Aerosols CAMS raw', 'Particulate matter CAMS raw']
#     LINK, COLNAME, ALIAS = 'ECMWF/CAMS/NRT', bands_CAMS, aliases_CAMS
#     CAMS = gee.get_satdata(LINK, COLNAME)
#     df_CAMS = gee.get_df_satdata(CAMS, COLNAME, ALIAS)
#     collection_gee += [df_CAMS]

In [None]:
#         # https://developers.google.com/earth-engine/datasets/catalog/NOAA_CFSV2_FOR6H
#     bands_CFSV2 = ['u-component_of_wind_height_above_ground', 'v-component_of_wind_height_above_ground']
#     aliases_CFSV2 = ['Wind U-component CFSV2 raw', 'Wind V-component CFSV2 raw']
#     LINK, COLNAME, ALIAS = 'NOAA/CFSV2/FOR6H', bands_CFSV2, aliases_CFSV2
#     CFSV2 = gee.get_satdata(LINK, COLNAME, clip=True)
#     df_CFSV2 = gee.get_df_satdata(CFSV2, COLNAME, ALIAS)
# #     images_CFSV2 = CFSV2.toList(CFSV2.size())
#     collection_gee += [df_CFSV2]

#     # https://developers.google.com/earth-engine/datasets/catalog/NASA_GLDAS_V021_NOAH_G025_T3H
#     base_data_G025 = [('Wind_f_inst', 'Wind speed G025 raw', 'Wind speed', 2),
#                       ('Tair_f_inst', 'Air temperature G025 raw', 'Temperature', 1),
#                       ('Psurf_f_inst', 'Pressure G025 raw', 'Air pressure', 0),
#                       ('Qair_f_inst', 'Specific humidity G025 raw', 'Dew point temp', 0),
#                       ('CanopInt_inst', 'Plant canopy surface water G025 raw', 'Precipitation', 1),
#                       ('PotEvap_tavg', 'Potential evaporation rate G025 raw', 'Moisture', 0),
#                       ('Qle_tavg', 'Latent heat net flux G025 raw', 'Road traffic intensity', 1),
#                       ('Rainf_tavg', 'Rain precipitation rate G025 raw', 'Precipitation', 0)]
    
#     bands_G025 = [band for band, alias, similar, shift in base_data_G025]
#     aliases_G025 = [alias for band, alias, similar, shift in base_data_G025]
#     LINK, COLNAME, ALIAS = 'NASA/GLDAS/V021/NOAH/G025/T3H', bands_G025, aliases_G025
#     G025 = gee.get_satdata(LINK, COLNAME, clip=True)
#     df_G025 = gee.get_df_satdata(G025, COLNAME, ALIAS)
# #     images_G025 = G025.toList(G025.size())
#     collection_gee += [df_G025]

#     # https://developers.google.com/earth-engine/datasets/catalog/ECMWF_CAMS_NRT
#     bands_CAMS = ['total_aerosol_optical_depth_at_550nm_surface', 'particulate_matter_d_less_than_25_um_surface']
#     aliases_CAMS = ['Aerosols CAMS raw', 'Particulate matter CAMS raw']
#     LINK, COLNAME, ALIAS = 'ECMWF/CAMS/NRT', bands_CAMS, aliases_CAMS
#     CAMS = gee.get_satdata(LINK, COLNAME)
#     df_CAMS = gee.get_df_satdata(CAMS, COLNAME, ALIAS)
#     df_with_sat += [df_CAMS]

In [None]:
# if SAT:
#     df_with_sat = pd.concat([df_all_data, df_S5P_NO2], axis=1)
# #     df_with_sat['Rain precipitation rate G025 raw'] = df_with_sat['Rain precipitation rate G025 raw'].fillna(0)

# #     for band, alias, similar, shift in base_data_G025: 
# #         df_with_sat[alias] = df_with_sat[alias].shift(periods=shift)
        
# #     df_with_sat[['Wind U-component CFSV2 raw', 'Wind V-component CFSV2 raw']] = df_with_sat[
# #         ['Wind U-component CFSV2 raw', 'Wind V-component CFSV2 raw']].shift(periods=9)

# # #     tester_sat = df_with_sat[['Wind U-component CFSV2', 'Wind V-component CFSV2']].dropna()
# #     df_with_sat['Wind speed CFSV2 raw'] = np.sqrt(df_with_sat['Wind U-component CFSV2 raw'] ** 2 + df_with_sat['Wind V-component CFSV2 raw'] ** 2)
# #     df_with_sat['Wind direction degrees CFSV2 raw'] = 180 / np.pi * np.arctan2(-df_with_sat['Wind U-component CFSV2 raw'], -df_with_sat['Wind V-component CFSV2 raw'])
# #     df_with_sat['Wind direction cos CFSV2 raw'] = np.cos(np.radians(df_with_sat['Wind direction degrees CFSV2 raw']))
# #     df_with_sat['Wind direction sin CFSV2 raw'] = np.sin(np.radians(df_with_sat['Wind direction degrees CFSV2 raw']))   
# #     df_with_sat.drop(['Wind direction degrees CFSV2 raw'], axis=1, inplace=True)
    
#     for i in [(pollutant_name_sat + ' raw', center_sensor, -12),]:
# #               ('Wind speed G025 raw', 'Wind speed', 0),
# #              ('Air temperature G025 raw', 'Temperature', 0),
# #              ('Pressure G025 raw', 'Air pressure', 0), ('Specific humidity G025 raw', 'Dew point temp', 0),
# #              ('Plant canopy surface water G025 raw', 'Precipitation', 0),
# #               ('Potential evaporation rate G025 raw', 'Moisture', 0),
# #               ('Latent heat net flux G025 raw', 'Road traffic intensity', 0),
# # #               ('Aerosols CAMS raw', 'Horizontal view', 0), ('Particulate matter CAMS raw', 'Horizontal view', 0),
# # #               ('Wind U-component CFSV2 raw', 'Wind direction cos', 0), ('Wind V-component CFSV2 raw', 'Wind direction sin', 0)
# #              ]:
#         var1, var2, adjuster = i
#         df_with_sat_align = df_with_sat[[var1, var2]].dropna()
#         model = LinearRegression().fit(np.array(df_with_sat_align[var1]).reshape(-1, 1), df_with_sat_align[var2])

#         # Find the slope and intercept from the model
#         slope = model.coef_[0] # Takes the first element of the array
#         intercept = model.intercept_
#         df_with_sat[var1] = slope * df_with_sat[var1] + intercept + adjuster
    
#     cols_raw = aliases_S5P_NO2
# #     + aliases_CFSV2 + aliases_G025 + ['Wind speed CFSV2 raw', 'Wind direction cos CFSV2 raw', 'Wind direction sin CFSV2 raw']
# #         ['Aerosols CAMS raw', 'Particulate matter CAMS raw']
    
#     for col in cols_raw:
#         df_with_sat[f'{col[:-4]}'] = df_with_sat[col].interpolate()

#     cols = [col[:-4] for col in cols_raw]
#     for _ in range(3):
#         df_with_sat = roller(df_with_sat, [cols[0]], 12)
#         df_with_sat = roller(df_with_sat, cols[1:], 3)            

#     for col in cols:
#         s = pd.Series(df_with_sat[col].to_list()).interpolate(method='polynomial', order=2)
#         df_with_sat[col] = [i for i in s]
    
#     ranger = -168
#     date_range = to_datetime(df_with_sat.index)
#     x_range = (date_range[ranger], date_range[-1])
    
#     x = to_datetime(df_with_sat.index[ranger:]) # .iloc[-predict_days:]
#     y0 = df_with_sat[pollutant_name_sat + ' raw'][ranger:]
#     y1 = df_with_sat[pollutant_name_sat][ranger:]

#     more = [
#         (y0, f'{pollutant} S5P raw', 'lightgray'),
#         (y1, f'{pollutant} S5P preprocessed', 'black')]

#     layout = Visualizer(f'Air quality {pollutant} S5P', x=(x, 'Time'), y=(y, col + ' trend analysis'),
#                         x_range=x_range, x_axis_type='datetime', title=f'Air quality: {pollutant} S5P',
#                         tools='pan,xwheel_zoom,box_zoom,reset', active_scroll='xwheel_zoom',
#                        visualize=VIS, file_out=plot_location).time_series(now_out=True, more=more)
        
#     df_with_sat.drop(cols_raw, axis=1, inplace=True)
#     outlier_filter(df_with_sat, [pollutant_name_sat])
   
#     readable_cols = df_with_sat.columns.to_list()
    
#     df_all_data = df_with_sat.copy()