# Data Inspection / Analysis
### Input: DataArray mit lat, lon, time

    1) Map erstellen
    2) m.plot(xar, save_to_path=False)


### Output: Plots im jupyter notebook von
- mean
- std
- spatial covariance
- histogram distribution

In [1]:
import xarray as xr
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.shapereader import Reader
from cartopy.feature import ShapelyFeature

import seaborn as sns

In [2]:
xar = xr.open_mfdataset('../data/usa/dynamic/*precip*.nc')

In [3]:
xar

<xarray.Dataset>
Dimensions:    (latitude: 101, longitude: 221, time: 10228)
Coordinates:
  * longitude  (longitude) float32 -125.0 -124.75 -124.5 ... -70.5 -70.25 -70.0
  * latitude   (latitude) float32 50.0 49.75 49.5 49.25 ... 25.5 25.25 25.0
  * time       (time) datetime64[ns] 1980-01-01T23:00:00 ... 2017-12-31T23:00:00
Data variables:
    lsp        (time, latitude, longitude) float32 dask.array<shape=(10228, 101, 221), chunksize=(31, 101, 221)>
    cp         (time, latitude, longitude) float32 dask.array<shape=(10228, 101, 221), chunksize=(31, 101, 221)>
Attributes:
    CDI:          Climate Data Interface version 1.6.4rc4 (http://code.zmaw.d...
    Conventions:  CF-1.6
    history:      Tue May 28 07:31:19 2019: cdo -b 32 daysum /home/srvx11/leh...
    CDO:          Climate Data Operators version 1.6.4rc7 (http://code.zmaw.d...

method = 'std'

stats = []
for var in xar:
    s = getattr(xar[var], method)(dim='time')
    stats.append(s)
stats2 = xr.merge(stats)

for v in stats2:
    fig = plt.figure(figsize=(15,10))
    plt.title('variable: '+v)
    stats[v].plot.pcolormesh()

## Projections

- ccrs.LambertConformal(central_longitude=-95, central_latitude=45)
- ccrs.Orthographic(-110, 35)
- ccrs.PlateCarree()

In [None]:
class Map(object):
    def __init__(self, figure_kws=dict(), **kwargs):
        
        self.proj = kwargs.pop('projection',
                               ccrs.LambertConformal(central_longitude=-110, 
                                                     central_latitude=45))
        self.transform = kwargs.pop('transform', 
                                    ccrs.PlateCarree())
        self.kwargs = kwargs
        self.fig_kws = figure_kws
        
    def plot(self, xar):
        fig = plt.figure(**self.fig_kws)
        ax = plt.axes(projection=self.proj);

        states_provinces = cfeature.NaturalEarthFeature(
            category='cultural',
            name='admin_1_states_provinces_lines',
            scale='50m',
            facecolor='none')
        countries = cfeature.NaturalEarthFeature(
            category='cultural',
            name='admin_0_boundary_lines_land',
            scale='50m',
            facecolor='none')
        rivers = cfeature.NaturalEarthFeature(scale='50m', category='physical',
                                              name='rivers_lake_centerlines', 
                                              edgecolor='blue', facecolor='none')

        ax.add_feature(countries, edgecolor='grey')
        ax.coastlines('50m')
        #ax.add_feature(states_provinces, edgecolor='gray')
        ax.add_feature(rivers, edgecolor='blue')

        if self.kwargs.get('drainage_baisins', True):
            sf = Reader("../data/drainage_basins/Major_Basins_of_the_World.shp")
            shape_feature = ShapelyFeature(sf.geometries(),
                                           self.transform, edgecolor='black')
            ax.add_feature(shape_feature, facecolor='none', edgecolor='green')
            
        xar.plot(transform=self.transform, ax=ax,
                 subplot_kws={'projection': self.proj})
        self.ax = ax

    def plot_point(self, lat, lon):
        self.ax.plot(lon, lat, color='cyan', marker='o', 
                     markersize=20, mew=4, markerfacecolor='none',
                     transform=self.transform)

In [None]:
anom = xar - xar.mean('time')
field = anom['lsp']
da = field #a = field # /field.std('time')

In [None]:
da

<xarray.DataArray 'lsp' (time: 10228, latitude: 101, longitude: 221)>
dask.array<shape=(10228, 101, 221), dtype=float32, chunksize=(31, 101, 221)>
Coordinates:
  * longitude  (longitude) float32 -125.0 -124.75 -124.5 ... -70.5 -70.25 -70.0
  * latitude   (latitude) float32 50.0 49.75 49.5 49.25 ... 25.5 25.25 25.0
  * time       (time) datetime64[ns] 1980-01-01T23:00:00 ... 2017-12-31T23:00:00

In [None]:
point = dict(latitude=40, longitude=-90)
cov = da.loc[point].dot(da)
cov = cov.compute()

# does it work until here?

In [None]:
cov.plot.pcolormesh()

In [None]:
m = Map(figure_kws=dict(figsize=(15,10)),
        projection=ccrs.LambertConformal(central_longitude=-110, 
                                         central_latitude=45),
        transform=ccrs.PlateCarree())

In [None]:
m.plot(cov)

In [None]:
cov = a[:,75,175].dot(a)
m.plot(cov)

In [None]:
cov = a[:,15,175].dot(a)
m.plot(cov)

In [None]:
point = dict(latitude=35, longitude=-75)
cov = a.loc[point].dot(a)
m.plot(cov)
m.plot_point(lat=point['latitude'], lon=point['longitude'])

In [None]:
point = dict(latitude=35, longitude=-112)
cov = a.loc[point].dot(a)
m.plot(cov)
m.plot_point(lat=point['latitude'], lon=point['longitude'])

In [None]:
a.loc[dict(latitude=35, longitude=-112)].plot()
a.loc[dict(latitude=33, longitude=-117)].plot()

# Dataset Variable's Distribution

using seaborn? or too many values ("big data")
-> probably "bokeh" module

In [None]:
data = xar['lsp'].values.ravel()
data = data[data > 0.001]

sns.distplot(data)