### Jupyter Notebook with a collection of all functions for processing STILT results

### Import this notebook with 
### %run '~/ICOSstations/STILT_modules_v1.3.ipynb'

#### The notebook contains modules to 
- import all required tools and libraries

- get list of ICOS class 1 and class 2 stations from Carbon Portal
    - `get_station_class()`


- store and update STILT station information in a dictionary 
    - `create_STILT_dictionary()`


- read STILT station information
    - `read_STILT_dictionary()`
    
    
- list available footprints and store them in a dictionary
    - `available_STILT_dictionary()`


- plot table with availability of STILT results
    - `plot_available_STILT()`
    
    
- read slot-specific csv files from stiltweb (new directory structure)
    - `read_stilt_timeseries(station,date_range)`
    
    
- read and aggregate footprints in netcdf format (new directory structure) 
    - `read_aggreg_footprints(date_range, timeselect='all')`


- read annual anthropogenic emissions EDBARv4.3_BP2015 
    - `read_emissions(filename)` 
    
    
- plot maps (emissions or footprints)
    - `plot_maps(field, lon, lat, title='', label='', unit='', linlog='linear', station=[''], zoom='', vmin=None, vmax=None, pngfile='')`


- plot time series (example only - needs to be adjusted)
    - `plot_stilt_timeseries(station,df,obs=None,meteo=None,title2='',linestyle = '.',pngfile='')`
    
    
- convert station longitude and latitude (slat, slon) to indices of STILT model grid (ix,jy)
    - `def lonlat_2_ixjy(slon,slat,mlon,mlat)`
    
    

#### Outdated modules to
- read yearly csv files (old directory structure)

- read footprints in netcdf format (old directory structure)

- add old STILT id to stations directory




### Import tools and libraries

In [1]:
# import required libraries
#%pylab inline
import netCDF4 as cdf
import numpy as np
import datetime as dt
import os
import fnmatch
import requests
import pickle
import pandas as pd
import matplotlib.pyplot as p
import matplotlib.colors as mcolors
from cartopy import config
import cartopy.crs as ccrs
from cartopy.feature import NaturalEarthFeature, LAND, COASTLINE
import cartopy.feature as cfeature

#UPDATE
#from mpl_toolkits.basemap import Basemap
from IPython.core.display import display, HTML 
display(HTML("<style>.container { width:100% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')

# define colors
orange='#ff8c00'
lime='#00ff00'
aqua='#00ffff'
brown='#663300'
lightgray="#C0C0C0"
gray="#808080"

### Get list of ICOS class 1 and class 2 stations from Carbon Portal

In [2]:
def get_station_class():
    # Query the ICOS SPARQL endpoint for a station list
    # query stationId, class, lng name and country
    # output is an object "data" containing the results in JSON

    url = 'https://meta.icos-cp.eu/sparql'

    query = """
    prefix st: <http://meta.icos-cp.eu/ontologies/stationentry/>
    select distinct ?stationId ?stationClass ?country ?longName
    from <http://meta.icos-cp.eu/resources/stationentry/>
    where{
      ?s a st:AS .
      ?s st:hasShortName ?stationId .
      ?s st:hasStationClass ?stationClass .
      ?s st:hasCountry ?country .
      ?s st:hasLongName ?longName .
      filter (?stationClass = "1" || ?stationClass = "2")
    }
    ORDER BY ?stationClass ?stationId 
    """
    r = requests.get(url, params = {'format': 'json', 'query': query})
    data = r.json()

    # convert the the result into a table
    # output is an array, where each row contains 
    # information about the station

    cols = data['head']['vars']
    datatable = []

    for row in data['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        
        datatable.append(item)

    # print the table 
    df_datatable = pd.DataFrame(datatable, columns=cols)
    #df_datatable.head(5)
    return df_datatable

### Store all STILT station information in a dictionary
Dictionary contains information on
- STILT station id
- Station coordinates (latitude, longitude)
- Altitude of tracer release in STILT simultation
- STILT location identifier
- Station name - if available

In [3]:
def create_STILT_dictionary():
    # store all STILT station information in a dictionary 

    # get all ICOS station IDs by listing subdirectories in stiltweb
    # extract location from filename of link
    
    #UPDATE
    pathStations='/data/stiltweb/stations/'
    #pathStations='/opt/stiltdata/fsicos2/stiltweb/stations/'
    allStations = os.listdir(pathStations)

    # empty dictionary
    stations = {}

    # fill dictionary with ICOS station id, latitude, longitude and altitude
    for ist in sorted(list(set(allStations))):
        stations[ist] = {}
        # get filename of link (original stiltweb directory structure) 
        # and extract location information
        if os.path.exists(pathStations+ist):
            loc_ident = os.readlink(pathStations+ist)
            clon = loc_ident[-13:-6]
            lon = np.float(clon[:-1])
            if clon[-1:] == 'W':
                lon = -lon
            clat = loc_ident[-20:-14]
            lat = np.float(clat[:-1])
            if clat[-1:] == 'S':
                lat = -lat
            alt = np.int(loc_ident[-5:])

            stations[ist]['lat']=lat
            stations[ist]['lon']=lon
            stations[ist]['alt']=alt
            stations[ist]['locIdent']=os.path.split(loc_ident)[-1]

        
    # add information on station name (and new STILT station id) from stations.csv file used in stiltweb 

    url="https://stilt.icos-cp.eu/viewer/stationinfo"
    df = pd.read_csv(url)

    for ist in sorted(list(set(stations))):
        stationName = df.loc[df['STILT id'] == ist]['STILT name']
        if len(stationName.value_counts()) > 0:
            stations[ist]['name'] = stationName.item()
        else:
            stations[ist]['name'] = ''

    # Get list of ICOS class 1 and class 2 stations from Carbon Portal
    df_datatable = get_station_class()

    # add information if ICOS class 1 or class 2 site
    for ist in sorted(list(set(stations))):
        stations[ist]['stationClass'] = np.nan
        for istICOS in df_datatable['stationId']:
            ic = int(df_datatable[df_datatable['stationId']==istICOS].index.values)
            if istICOS in ist:
                stations[ist]['stationClass'] = df_datatable['stationClass'][ic]

    # print dictionary
    #for ist in sorted(stations):
    #    print ('station:', ist)
    #    for k in stations[ist]:
    #        print (k,':', stations[ist][k])

    # write dictionary to pickle file for further use
    pickle.dump( stations, open( "stationsDict.pickle", "wb" ) )

    return stations

### Read dictionary with all stations
Dictionary contains information on
- STILT station id
- Station coordinates (latitude, longitude)
- Altitude of tracer release in STILT simultation
- STILT location identifier
- Station name - if available

In [4]:
def read_STILT_dictionary():
    # read STILT station dictionary from pickle file

    filename = 'stationsDict.pickle'
    stations = pd.read_pickle(filename)

    # print dictionary
    for ist in sorted(stations):
        print ('station:', ist)
        for k in stations[ist]:
            print (k,':', stations[ist][k])

    return stations

### List available footprints and store them in a dictionary

In [5]:
def available_STILT_dictionary():
    # store availability of STILT footprints in a dictionary 

    # get all ICOS station IDs by listing subdirectories in stiltweb
    # extract availability from directory structure
    
    #new:
    pathStations='/data/stiltweb/stations/'
    #pathStations='/opt/stiltdata/fsicos2/stiltweb/stations/'
    allStations = os.listdir(pathStations)

    # empty dictionary
    available = {}

    # fill dictionary with station name, years and months for each year
    for ist in sorted(list(set(allStations))):
        if os.path.exists(pathStations+'/'+ist):
            #print ('directory '+pathStations+'/'+ist+' exits')
            available[ist] = {}
            years = os.listdir(pathStations+'/'+ist)
            available[ist]['years'] = years
            for yy in sorted(available[ist]['years']):
                available[ist][yy] = {}
                months = os.listdir(pathStations+'/'+ist+'/'+yy)
                available[ist][yy]['months'] = months
                available[ist][yy]['nmonths'] = len(available[ist][yy]['months'])
        #else:
        #    print ('directory '+pathStations+'/'+ist+' does not exit')

    # Get list of ICOS class 1 and class 2 stations from Carbon Portal
    df_datatable = get_station_class()

    # add information if ICOS class 1 or class 2 site
    for ist in sorted(available):
        available[ist]['stationClass'] = np.nan
        for istICOS in df_datatable['stationId']:
            ic = int(df_datatable[df_datatable['stationId']==istICOS].index.values)
            if istICOS in ist:
                available[ist]['stationClass'] = df_datatable['stationClass'][ic]

    # print availability
    #for ist in sorted(available):
    #    print ('station:', ist)
    #    for k in available[ist]:
    #        print (k,':', available[ist][k])
    return available

### Plot STILT footprint availability

In [6]:
def plot_available_STILT(pngfile=''):
    
    print ('run available_STILT_dictionary()')
    available = available_STILT_dictionary()
    
    # Plot availability
    # Each dot in the figure below represents one year. 
    # The size of the dot is proportional to the number of months per year for which footprints are available. 

    startyear = 2006
    endyear = 2018

    ny = endyear - startyear + 1
    yy = np.arange(ny) + startyear
    nm = np.zeros(ny)
    dy = 0.5

    fig = p.figure(figsize=(15, 32))
    for i, ist in enumerate(sorted(available, reverse=True)) :
        # available number of months per available year
        nm = [available[ist][str(yy[j])]['nmonths'] if str(yy[j]) in available[ist].keys() else 0 for j in np.arange(ny)]
        if available[ist]['stationClass'] == '1':
            x = p.scatter(yy, np.ones(np.size(yy))*i+dy,c='r',marker='D', s=30*np.sqrt(np.asarray(nm)))
            p.text(startyear-2+0.2, i+dy/2, ist, color='r', fontsize=14)         
        elif available[ist]['stationClass'] == '2':
            x = p.scatter(yy, np.ones(np.size(yy))*i+dy,c='b', marker='^', s=40*np.sqrt(np.asarray(nm)))
            p.text(startyear-2+0.2, i+dy/2, ist, color='b', fontsize=14)
        else:
            x = p.scatter(yy, np.ones(np.size(yy))*i+dy,c='k',s=40*np.sqrt(np.asarray(nm)))
            p.text(startyear-2+0.2, i+dy/2, ist, fontsize=14)         
        
    p.xticks(np.arange(startyear-2, np.max(yy)+2, 1.0))
    p.xlim(startyear-2, np.max(yy)+1)
    p.yticks(np.arange(0, len(available), 1.0), ())
    p.ylim(0, len(available))
    p.grid(axis='y')
    p.tick_params(labeltop=True,labelsize=14)
    p.title('Available STILT footprints (size proportional to number of months per year)\n\n\n', fontsize=18)
    p.figtext(0.4, 0.9, 'ICOS class 1 stations in red', color='r', fontsize=16, ha ='right')
    p.figtext(0.6, 0.9, 'ICOS class 2 stations in blue', color='b', fontsize=16, ha ='left')
    p.show()
    p.close()
    if len(pngfile)>0:
        plotdir='plots'
        if not os.path.exists(plotdir):
            os.mkdir(plotdir)
        fig.savefig(plotdir+'/'+pngfile+'_'+dt.datetime.now().strftime('%Y%m%d')+'.png',dpi=100)

### Convert station longitude and latitude to STILT grid indices

In [7]:
# function to convert station longitude and latitude (slat, slon) to indices of STILT model grid (ix,jy)
def lonlat_2_ixjy(slon,slat,mlon,mlat):
    #slon, slat: longitude and latitude of station
    #mlon, mlat: 1-dim. longitude and latitude of model grid
    ix = (np.abs(mlon-slon)).argmin()
    jy = (np.abs(mlat-slat)).argmin()
    return ix,jy

### Read annual mean EDGAR emissions

In [8]:
# function to read annual mean EDGAR emissions
def read_emissions(filename):
    
    # read annual mean anthropogenic emissions
    # latitude and longitude are for lower left corner od grid cell

    path_edgar = '/opt/stiltdata/RINGO/Emissions/'
    #filename='EDGARv4.3_BP2015.timemean.co2.nc'
    f = cdf.Dataset(path_edgar+filename)
    #print (f)
    emis=f.variables['emission'][:,:,:] # name not correct, these are total CO2 emissions
    lon_ll=f.variables['lon'][:]
    lat_ll=f.variables['lat'][:]
    time=f.variables['time'][:]
    dlon=np.abs(lon_ll[2]-lon_ll[1])
    dlat=np.abs(lat_ll[2]-lat_ll[1])
    emis_lon=lon_ll+0.5*dlon
    emis_lat=lat_ll+0.5*dlat

    #print (np.shape(emis))  

    return emis, emis_lon, emis_lat


### Plot maps (emissions or footprints) --- outdated basemap version

In [9]:
# function to plot maps (show station location if station is provided and zoom in second plot if zoom is provided)
def plot_maps_basemap(field, lon, lat, title='', label='', unit='', linlog='linear', station=[''], zoom='', 
              vmin=None, vmax=None, colors='GnBu',pngfile=''):
    """Plot maps (emissions or footprints)
    show station location if station is provided and zoom in second plot if zoom is provided
    """    

    #https://matplotlib.org/users/colormapnorms.html#custom-normalization-two-linear-ranges
    #http://chris35wills.github.io/matplotlib_diverging_colorbar/
    class MidpointNormalize(mcolors.Normalize):
        def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
            self.midpoint = midpoint
            mcolors.Normalize.__init__(self, vmin, vmax, clip)

        def __call__(self, value, clip=None):
            # I'm ignoring masked values and all kinds of edge cases to make a
            # simple example...
            x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
            return np.ma.masked_array(np.interp(value, x, y))

    #print (np.shape(field))
    
    if np.shape(field)[0] > 1:
        print ('More than one field: ',np.shape(field)[0],' Only the first will be plotted!!!')
        
    fig = p.figure(figsize=(15,8))

    ax = fig.add_subplot(1,2,1)
    m = Basemap(projection='cyl', llcrnrlat=lat.min(), urcrnrlat=lat.max(), 
                llcrnrlon=lon.min(), urcrnrlon=lon.max(), resolution='l',)
    m.drawcoastlines(linewidth=0.3)
    m.drawmapboundary(fill_color='none',linewidth=0.3)
    m.drawcountries(linewidth=0.3)
    
    #cmap = p.get_cmap('Blues')
    #cmap = p.get_cmap('GnBu')
    cmap = p.get_cmap(colors)
    if linlog == 'linear':
        im = m.imshow(field[0,:,:],interpolation='none',origin='lower',vmin=vmin,vmax=vmax,cmap=cmap)#,norm=MidpointNormalize(midpoint=0.,vmin=vmin,vmax=vmax))
        cbar=m.colorbar(im,location='bottom',pad='5%')
        cbar.set_label(label+'  '+unit)
    else:
        im = m.imshow(np.log10(field)[0,:,:],interpolation='none',origin='lower',vmin=vmin,vmax=vmax,cmap=cmap)
        cbar=m.colorbar(im,location='bottom',pad='5%')
        cbar.set_label(label+'  log$_{10}$ '+unit)
    p.title(title)
    ax.text(0.01, -0.25, 'min: %.5f' % np.min(field[0,:,:]), horizontalalignment='left',transform=ax.transAxes)
    ax.text(0.99, -0.25, 'max: %.5f' % np.max(field[0,:,:]), horizontalalignment='right',transform=ax.transAxes)
    
    if station[0] != '':
        for ist in station:
            #show station location if station is provided
            m.plot(stations[ist]['lon'],stations[ist]['lat'],'m+',ms=8)

    #zoom   
    if zoom != '':
        #grid cell index of station 
        ix,jy = lonlat_2_ixjy(stations[zoom]['lon'],stations[zoom]['lat'],lon,lat)
        #print (stations[zoom]['lon'],stations[zoom]['lat'],ix,jy)

        # define zoom area 
        i1 = np.max([ix-35,0])
        i2 = np.min([ix+35,400])
        j1 = np.max([jy-42,0])
        j2 = np.min([jy+42,480])

        #print (i1,i2,j1,j2)

        ax = fig.add_subplot(1,2,2)
        m = Basemap(projection='cyl', llcrnrlat=lat[j1:j2].min(), urcrnrlat=lat[j1:j2].max(),
                    llcrnrlon=lon[i1:i2].min(), urcrnrlon=lon[i1:i2].max(), resolution='i',)
        m.drawcoastlines(linewidth=0.3)
        m.drawmapboundary(fill_color='none',linewidth=0.3)
        m.drawcountries(linewidth=0.3)
    
        if linlog == 'linear':
            im = m.imshow(field[0,j1:j2,i1:i2],interpolation='none',origin='lower',vmin=vmin,vmax=vmax,cmap=cmap)
            cbar=m.colorbar(im,location='bottom',pad='5%')
            cbar.set_label(label+'  '+unit)
        else:
            im = m.imshow(np.log10(field)[0,j1:j2,i1:i2],interpolation='none',origin='lower',vmin=vmin,vmax=vmax,cmap=cmap)
            cbar=m.colorbar(im,location='bottom',pad='5%')
            cbar.set_label(label+'  log$_{10}$ '+unit)
        for ist in list(set([zoom] + station)):
            m.plot(stations[ist]['lon'],stations[ist]['lat'],'m+',ms=8)
        p.title(title)
        ax.text(0.01, -0.25, 'min: %.5f' % np.min(field[0,j1:j2,i1:i2]), horizontalalignment='left',transform=ax.transAxes)
        ax.text(0.99, -0.25, 'max: %.5f' % np.max(field[0,j1:j2,i1:i2]), horizontalalignment='right',transform=ax.transAxes)
    #p.tight_layout()
    p.show()
    if len(pngfile)>0:
        plotdir='plots'
        if not os.path.exists(plotdir):
            os.mkdir(plotdir)
        fig.savefig(plotdir+'/'+pngfile+'.png',dpi=100)
    p.close()
    

In [10]:
# function to plot maps (show station location if station is provided and zoom in second plot if zoom is provided)
def plot_maps(field, lon, lat, title='', label='', unit='', linlog='linear', station=[''], zoom='', 
              vmin=None, vmax=None, colors='GnBu',pngfile=''):
    """Plot maps (emissions or footprints)
    show station location if station is provided and zoom in second plot if zoom is provided
    """    

    mcolor='m'
    
    # Set scale for features from Natural Earth
    #NEscale = '110m'
    NEscale = '50m'
    #NEscale = '10m'
    
    # Create a feature for Countries at 1:50m from Natural Earth
    countries = cfeature.NaturalEarthFeature(
        category='cultural',
        name='admin_0_countries',
        scale=NEscale,
        facecolor='none')

    #print (np.shape(field))
    
    if np.shape(field)[0] > 1:
        print ('More than one field: ',np.shape(field)[0],' Only the first will be plotted!!!')
        
    fig = p.figure(figsize=(15,15))

    # set up a map
    ax = p.subplot(1, 2, 1, projection=ccrs.PlateCarree())
    img_extent = (lon.min(), lon.max(), lat.min(), lat.max())
    ax.set_extent([lon.min(), lon.max(), lat.min(), lat.max()],crs=ccrs.PlateCarree())
    ax.add_feature(countries, edgecolor='black', linewidth=0.3)

    cmap = p.get_cmap(colors)
    if linlog == 'linear':
        im = ax.imshow(field[0,:,:],interpolation='none',origin='lower', extent=img_extent,cmap=cmap,vmin=vmin,vmax=vmax)
        cbar=p.colorbar(im,orientation='horizontal',pad=0.03,fraction=0.055,extend='both')
        cbar.set_label(label+'  '+unit)
    else:
        im = ax.imshow(np.log10(field)[0,:,:],interpolation='none',origin='lower', extent=img_extent,cmap=cmap,vmin=vmin,vmax=vmax)
        cbar=p.colorbar(im,orientation='horizontal',pad=0.03,fraction=0.055,extend='both')
        cbar.set_label(label+'  log$_{10}$ '+unit)
    p.title(title)
    ax.text(0.01, -0.25, 'min: %.5f' % np.min(field[0,:,:]), horizontalalignment='left',transform=ax.transAxes)
    ax.text(0.99, -0.25, 'max: %.5f' % np.max(field[0,:,:]), horizontalalignment='right',transform=ax.transAxes)
    
    #show station location if station is provided
    if station[0] != '':
        station_lon=[]
        station_lat=[]
        for ist in station:
            station_lon.append(stations[ist]['lon'])
            station_lat.append(stations[ist]['lat'])
        ax.plot(station_lon,station_lat,'+',color=mcolor,ms=10,markeredgewidth=1,transform=ccrs.PlateCarree())

    #zoom   
    if zoom != '':
        #grid cell index of station 
        ix,jy = lonlat_2_ixjy(stations[zoom]['lon'],stations[zoom]['lat'],lon,lat)
        #print (stations[zoom]['lon'],stations[zoom]['lat'],ix,jy)

        # define zoom area 
        i1 = np.max([ix-35,0])
        i2 = np.min([ix+35,400])
        j1 = np.max([jy-42,0])
        j2 = np.min([jy+42,480])

        lon_z=lon[i1:i2]
        lat_z=lat[j1:j2]
        field_z=field[0,j1:j2,i1:i2]

        #print (i1,i2,j1,j2)

        # set up a map
        ax = p.subplot(1, 2, 2, projection=ccrs.PlateCarree())
        img_extent = (lon_z.min(), lon_z.max(), lat_z.min(), lat_z.max())
        ax.set_extent([lon_z.min(), lon_z.max(), lat_z.min(), lat_z.max()],crs=ccrs.PlateCarree())
        ax.add_feature(countries, edgecolor='black', linewidth=0.3)
    
        if linlog == 'linear':
            im = ax.imshow(field_z,interpolation='none',origin='lower', extent=img_extent,cmap=cmap,vmin=vmin,vmax=vmax)
            cbar=p.colorbar(im,orientation='horizontal',pad=0.03,fraction=0.055,extend='both')
            #im = m.imshow(field[0,j1:j2,i1:i2],interpolation='none',origin='lower',vmin=vmin,vmax=vmax,cmap=cmap)
            #cbar=m.colorbar(im,location='bottom',pad='5%')
            cbar.set_label(label+'  '+unit)
        else:
            im = ax.imshow(np.log10(field_z),interpolation='none',origin='lower', extent=img_extent,cmap=cmap,vmin=vmin,vmax=vmax)
            cbar=p.colorbar(im,orientation='horizontal',pad=0.03,fraction=0.055,extend='both')
            #im = m.imshow(np.log10(field)[0,j1:j2,i1:i2],interpolation='none',origin='lower',vmin=vmin,vmax=vmax,cmap=cmap)
            #cbar=m.colorbar(im,location='bottom',pad='5%')
            cbar.set_label(label+'  log$_{10}$ '+unit)

        #show station location if station is provided
        if station[0] != '':
            station_lon=[]
            station_lat=[]
            for ist in list(set([zoom] + station)):
                station_lon.append(stations[ist]['lon'])
                station_lat.append(stations[ist]['lat'])
            ax.plot(station_lon,station_lat,'+',color=mcolor,ms=10,markeredgewidth=1,transform=ccrs.PlateCarree())
        p.title(title)
        ax.text(0.01, -0.25, 'min: %.5f' % np.min(field[0,j1:j2,i1:i2]), horizontalalignment='left',transform=ax.transAxes)
        ax.text(0.99, -0.25, 'max: %.5f' % np.max(field[0,j1:j2,i1:i2]), horizontalalignment='right',transform=ax.transAxes)
    #p.tight_layout()
    p.show()
    if len(pngfile)>0:
        plotdir='plots'
        if not os.path.exists(plotdir):
            os.mkdir(plotdir)
        fig.savefig(plotdir+'/'+pngfile+'.png',dpi=100)
    p.close()
    

### Read and aggregate STILT footprints

In [11]:
#function to read and aggregate footprints for given time range
def read_aggreg_footprints(station, date_range, timeselect='all'):
    
    # loop over all dates and read netcdf files

    # path to footprint files in new stiltweb directory structure
    pathFP='/data/stiltweb/stations/'
    

    # print ('date range: ',date_range)
    fp=[]
    nfp=0
    first = True
    for date in date_range:
        filename=(pathFP+station+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
             +str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
        #print (filename)
        if os.path.isfile(filename):
            f_fp = cdf.Dataset(filename)
            if (first):
                fp=f_fp.variables['foot'][:,:,:]
                lon=f_fp.variables['lon'][:]
                lat=f_fp.variables['lat'][:]
                first = False
            else:
                fp=fp+f_fp.variables['foot'][:,:,:]
            f_fp.close()
            nfp+=1
        #else:
            #print ('file does not exist: ',filename)
    if nfp > 0:
        fp=fp/nfp
        

    else:
        print ('no footprints found')
        

    #print (np.shape(fp))
    #print (np.max(fp))
    title = 'not used'
    #title = (start_date.strftime('%Y-%m-%d')+' - '+end_date.strftime('%Y-%m-%d')+'\n'+
     #        'time selection: '+timeselect)
    
    return nfp, fp, lon, lat, title

### Read STILT time series (new format)

In [12]:
#updated --> take the timeselect list and returns the "correct" dataframe
#otherwise - not correct hours!
# function to read STILT concentration time series (new format of STILT results)
def read_stilt_timeseries_upd(station,date_range,timeselect_list):
    url = 'https://stilt.icos-cp.eu/viewer/stiltresult'
    headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'}
    # check if STILT results exist
    pathFP='/data/stiltweb/stations/'
    new_range=[]
    
    for date in date_range:
        #--> new : pathStations='/data/stiltweb/stations/'
        #pathStations='/opt/stiltdata/fsicos2/stiltweb/stations/'
        if os.path.exists(pathFP+station+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
             +str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/'):
            new_range.append(date)
        #if os.path.exists('/opt/stiltdata/fsicos2/stiltweb/slots/'+stations[station]['locIdent']+'/'+str(zDate.year)+'/'+str(zDate.month).zfill(2)+'/'
         #           +str(zDate.year)+'x'+str(zDate.month).zfill(2)+'x'+str(zDate.day).zfill(2)+'x'+str(zDate.hour).zfill(2)+'/'):
          #  
        
        
        #filename=(pathFP+station+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
         #    +str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
        
    if len(new_range) > 0:
        date_range = new_range
        fromDate = date_range[0].strftime('%Y-%m-%d')
        toDate = date_range[-1].strftime('%Y-%m-%d')
        columns = ('["isodate","co2.stilt","co2.fuel","co2.bio","co2.bio.gee","co2.bio.resp","co2.fuel.coal","co2.fuel.oil",'+
                   '"co2.fuel.gas","co2.fuel.bio","co2.energy","co2.transport", "co2.industry",'+
                   '"co2.others", "co2.cement", "co2.background",'+
                   '"co.stilt","co.fuel","co.bio","co.fuel.coal","co.fuel.oil",'+
                   '"co.fuel.gas","co.fuel.bio","co.energy","co.transport", "co.industry",'+
                   '"co.others", "co.cement", "co.background",'+
                   '"rn", "rn.era","rn.noah","wind.dir","wind.u","wind.v","latstart","lonstart"]')
        data = '{"columns": '+columns+', "fromDate": "'+fromDate+'", "toDate": "'+toDate+'", "stationId": "'+station+'"}'
        #print (data)
        response = requests.post(url, headers=headers, data=data)
        if response.status_code != 500:
            #print (response.json())
            output=np.asarray(response.json())
            df = pd.DataFrame(output[:,:], columns=eval(columns))
            df = df.replace('null',np.NaN)
            df = df.astype(float)
            df['date'] = pd.to_datetime(df['isodate'], unit='s')
            df.set_index(['date'],inplace=True)
            df['name'] = station
            df['model'] = 'STILT'
            df['wind.speed']=np.sqrt((df['wind.u']**2)+(df['wind.v']**2))
            #print (df.columns)
    else:
        df=pd.DataFrame({'A' : []})
        
    df=df[(df['co2.fuel'].index.hour.isin(timeselect_list))]
    
    return df

In [13]:
# function to read STILT concentration time series (new format of STILT results)
def read_stilt_timeseries(station,date_range):
    url = 'https://stilt.icos-cp.eu/viewer/stiltresult'
    headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'}
    # check if STILT results exist
    pathFP='/data/stiltweb/stations/'
    new_range=[]
    
    for date in date_range:
        #--> new : pathStations='/data/stiltweb/stations/'
        #pathStations='/opt/stiltdata/fsicos2/stiltweb/stations/'
        if os.path.exists(pathFP+station+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
             +str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/'):
            new_range.append(date)
        #if os.path.exists('/opt/stiltdata/fsicos2/stiltweb/slots/'+stations[station]['locIdent']+'/'+str(zDate.year)+'/'+str(zDate.month).zfill(2)+'/'
         #           +str(zDate.year)+'x'+str(zDate.month).zfill(2)+'x'+str(zDate.day).zfill(2)+'x'+str(zDate.hour).zfill(2)+'/'):
          #  
        
        
        #filename=(pathFP+station+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
         #    +str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
        
    if len(new_range) > 0:
        date_range = new_range
        fromDate = date_range[0].strftime('%Y-%m-%d')
        toDate = date_range[-1].strftime('%Y-%m-%d')
        columns = ('["isodate","co2.stilt","co2.fuel","co2.bio","co2.bio.gee","co2.bio.resp","co2.fuel.coal","co2.fuel.oil",'+
                   '"co2.fuel.gas","co2.fuel.bio","co2.energy","co2.transport", "co2.industry",'+
                   '"co2.others", "co2.cement", "co2.background",'+
                   '"co.stilt","co.fuel","co.bio","co.fuel.coal","co.fuel.oil",'+
                   '"co.fuel.gas","co.fuel.bio","co.energy","co.transport", "co.industry",'+
                   '"co.others", "co.cement", "co.background",'+
                   '"rn", "rn.era","rn.noah","wind.dir","wind.u","wind.v","latstart","lonstart"]')
        data = '{"columns": '+columns+', "fromDate": "'+fromDate+'", "toDate": "'+toDate+'", "stationId": "'+station+'"}'
        #print (data)
        response = requests.post(url, headers=headers, data=data)
        if response.status_code != 500:
            #print (response.json())
            output=np.asarray(response.json())
            df = pd.DataFrame(output[:,:], columns=eval(columns))
            df = df.replace('null',np.NaN)
            df = df.astype(float)
            df['date'] = pd.to_datetime(df['isodate'], unit='s')
            df.set_index(['date'],inplace=True)
            df['name'] = station
            df['model'] = 'STILT'
            df['wind.speed']=np.sqrt((df['wind.u']**2)+(df['wind.v']**2))
            #print (df.columns)
    else:
        df=pd.DataFrame({'A' : []})
    return df

### Read STILT time series with all components

In [14]:
# function to read STILT concentration time series (new format of STILT results)
def read_stilt_raw_timeseries(station,date_range):
    url = 'https://stilt.icos-cp.eu/viewer/stiltrawresult'
    headers = {'Content-Type': 'application/json', 'Accept-Charset': 'UTF-8'}
    # check if STILT results exist
    new_range=[]
    for zDate in date_range:
        if os.path.exists('/opt/stiltdata/fsicos2/stiltweb/slots/'+stations[station]['locIdent']+'/'+str(zDate.year)+'/'+str(zDate.month).zfill(2)+'/'
                    +str(zDate.year)+'x'+str(zDate.month).zfill(2)+'x'+str(zDate.day).zfill(2)+'x'+str(zDate.hour).zfill(2)+'/'):
            new_range.append(zDate)
    if len(new_range) > 0:
        date_range = new_range
        fromDate = date_range[0].strftime('%Y-%m-%d')
        toDate = date_range[-1].strftime('%Y-%m-%d')
        columns = ('["isodate","co2.1a1bcr.coal_hard", "co2.1a1bcr.coal_peat", "co2.1a1bcr.gas_der"]')
        data = '{"columns": '+columns+', "fromDate": "'+fromDate+'", "toDate": "'+toDate+'", "stationId": "'+station+'"}'
        print(data)
        response = requests.post(url, headers=headers, data=data)
        if response.status_code != 500:
            #print response.json()
            output=np.asarray(response.json())
            df = pd.DataFrame(output[:,:], columns=eval(columns))
            df = df.replace('null',np.NaN)
            df = df.astype(float)
            df['date'] = pd.to_datetime(df['isodate'], unit='s')
            df.set_index(['date'],inplace=True)
            df['name'] = station
            df['model'] = 'STILT'
            #df['wind.speed']=np.sqrt((df['wind.u']**2)+(df['wind.v']**2))
            #print df.columns
    else:
        df=pd.DataFrame({'A' : []})
    return df

### Plot STILT time series (basic example)

In [15]:
# function to plot STILT time series (new format of STILT results)
def plot_stilt_timeseries(station,df,obs=None,meteo=None,title2='',linestyle = '.',pngfile='',add_tracer=[]):
    #plot time series
    tracer = add_tracer + ['co2']
    tracer = [x.lower() for x in tracer]
    fig = p.figure(figsize=(15,15.5))
    ax = fig.add_subplot(6,1,1)
    p.plot(df.index,df['co2.stilt'],linestyle,color='b',label='STILT co2.stilt')
    p.plot(df.index,df['co2.background'],linestyle,color='c',label='STILT co2.background')
    if obs is not None:
        if 'DateTime' in obs:
            p.plot(obs.DateTime,obs['CO2'],linestyle,color='k',label='observation')
        else:
            p.plot(obs.index,obs['co2'].astype(np.float32),linestyle,color='k',label='observation')
    p.title(df.name[0]+'  '+str(df['latstart'][0])+'$^\circ$N'+'  '+str(df['lonstart'][0])+'$^\circ$E'+'    '+title2)
    ax.set_xlim(start_date,end_date)
    ax.set_ylabel('CO$_2$  [ppm]')
    ax.grid(axis='x')
    ax.legend(loc='upper right')
    ax = fig.add_subplot(6,1,2)
    p.plot(df.index,df['co2.fuel'],linestyle,color='r',label=df.model[0]+' co2.fuel')
    p.plot(df.index,df['co2.bio'],linestyle,color='g',label=df.model[0]+' co2.bio')
    ax.set_xlim(start_date,end_date)
    #ax.set_ylim(-50,200)
    ax.set_ylabel('CO$_2$ components  [ppm]')
    ax.grid(axis='x')
    ax.legend(loc='upper right')
    if ('co.stilt' in df) and ('co' in tracer):
        ax = fig.add_subplot(6,1,3)
        p.plot(df.index,df['co.stilt'],linestyle,color='m',label='STILT co.stilt')
        if obs is not None and 'co' in obs:
            p.plot(obs.index,obs['CO'],linestyle,color='k',label='observation')
        p.title(df.name[0]+'  '+str(df['latstart'][0])+'$^\circ$N'+'  '+str(df['lonstart'][0])+'$^\circ$E'+'    '+title2)
        ax.set_xlim(start_date,end_date)
        ax.set_ylabel('CO  [ppb]')
        ax.grid(axis='x')
        ax.legend(loc='upper right')
    if ('rn' in df) and ('rn' in tracer):
        ax = fig.add_subplot(6,1,4)
        p.plot(df.index,df['rn'],linestyle,color='y',label='STILT rn')
        if obs is not None and 'rn' in obs:
            p.plot(obs.index,obs['rn'],linestyle,color='k',label='observation')
        p.title(df.name[0]+'  '+str(df['latstart'][0])+'$^\circ$N'+'  '+str(df['lonstart'][0])+'$^\circ$E'+'    '+title2)
        ax.set_xlim(start_date,end_date)
        ax.set_ylabel('222RN  [???]')
        ax.grid(axis='x')
        ax.legend(loc='upper right')
    p.tight_layout()
    p.show()
    if len(pngfile)>0:
        plotdir='plots'
        if not os.path.exists(plotdir):
            os.mkdir(plotdir)
        fig.savefig(plotdir+'/'+pngfile+'.png',dpi=100)
    p.close()


### Plot STILT time series together with ICOS data

In [16]:
# function to plot ICOS (or other measurement) data together with STILT time series (new format of STILT results)
def plot_icos_stilt_timeseries(station,df,obs=None,meteo=None,title2='',linestyle='.',pngfile='',add_tracer=[]):
    # call basic function plot_stilt_timeseries, which allows to add observation data frame 
    plot_stilt_timeseries(station,df,obs=obs,meteo=meteo,title2=title2,linestyle=linestyle,pngfile=pngfile,add_tracer=add_tracer)

### Outdated modules

#### Read old STILT time series (in Results_1A4 directory)

In [17]:
# function to read STILT time series
def read_stilt_ts(station,locIdent,year):
    filename='/opt/stiltdata/Results_1A4/'+station+'/stiltresult'+str(year)+'x'+locIdent+'.csv'
    if os.path.isfile(filename):
        #print (filename)
        df= pd.read_csv(filename,delim_whitespace=True)
        df.date = pd.to_datetime(df[['year', 'month', 'day', 'hour']])
        df.name = station
        df.model = 'oldSTILT'
        #df['wind.speed']=np.sqrt((df['wind.u']**2)+(df['wind.v']**2))
        #print (df.columns)
        df.set_index(['date'],inplace=True)
    else:
        df=pd.DataFrame({'A' : []})
    return df

#### Read very old STILT time series, emissionsector 1A4 missing (in Results directory)

In [18]:
# function to read STILT time series
def read_stilt_ts_old(station,locIdent,year):
    filename='/opt/stiltdata/Results/'+station+'/stiltresults'+str(year)+'.csv'
    if os.path.isfile(filename):
        #print (filename)
        df= pd.read_csv(filename,delim_whitespace=True)
        df.date = pd.to_datetime(df[['year', 'month', 'day', 'hour']])
        df.name = station
        df.model = 'oldSTILT'
        #df['wind.speed']=np.sqrt((df['wind.u']**2)+(df['wind.v']**2))
        #print (df.columns)
        df.set_index(['date'],inplace=True)
    else:
        df=pd.DataFrame({'A' : []})
    return df

In [19]:
def add_old_STILTid(stations):
    
    filename='stations_oldSTILTids_20181106.csv'
    df= pd.read_csv(filename,delimiter=';')

    for ist in sorted(list(set(stations))):
    
        oldId = df.loc[df['new STILT id'] == ist]['old STILT id']

        if len(oldId.value_counts()) > 0:
            stations[ist]['old id'] = oldId.item()
        else:
            stations[ist]['old id'] = ist

    # print dictionary
    for ist in sorted(stations):
        print ('station:', ist)
        for k in stations[ist]:
            print (k,':', stations[ist][k])
            
    return stations

In [20]:
# list all defined functions
#func = %who_ls function
#print ("\033[1m" + "Functions defined for handling STILT output:" + "\033[0;0m")
#for ff in func:
    #print (ff)

In [21]:
# list all loaded modules
#modu = %who_ls module
#print ("\033[1m" + "Modules loaded:" + "\033[0;0m")
#for mm in modu:
 #   print (mm)
#print ("\n")