In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import xarray as xr
import os, sys
import dask

pd.set_option("display.max_rows", 1000000)

In [38]:


ds = xr.tutorial.open_dataset('rasm').load()

def parse_datetime(time):
    return pd.to_datetime([str(x) for x in time])

ds.coords['time'] = parse_datetime(ds.coords['time'].values)

ds = ds.isel({'x':slice(0,20),
              'y':slice(0,20)})

ds

<xarray.Dataset>
Dimensions:  (time: 36, x: 20, y: 20)
Coordinates:
  * time     (time) datetime64[ns] 1980-09-16T12:00:00 1980-10-17 ... 1983-08-17
    xc       (y, x) float64 189.2 189.4 189.6 189.7 ... 186.8 186.9 187.1 187.3
    yc       (y, x) float64 16.53 16.78 17.02 17.27 ... 23.72 24.0 24.27 24.54
Dimensions without coordinates: x, y
Data variables:
    Tair     (time, y, x) float64 nan nan nan nan nan ... nan nan nan nan nan
Attributes:
    title:                     /workspace/jhamman/processed/R1002RBRxaaa01a/l...
    institution:               U.W.
    source:                    RACM R1002RBRxaaa01a
    output_frequency:          daily
    output_mode:               averaged
    convention:                CF-1.4
    references:                Based on the initial model of Liang et al., 19...
    comment:                   Output from the Variable Infiltration Capacity...
    nco_openmp_thread_number:  1
    NCO:                       "4.6.0"
    history:                   

In [40]:
ds.attrs['comment']

'Output from the Variable Infiltration Capacity (VIC) model.'

In [37]:
ds.indexes

time: DatetimeIndex(['1980-09-16 12:00:00', '1980-10-17 00:00:00',
                     '1980-11-16 12:00:00', '1980-12-17 00:00:00',
                     '1981-01-17 00:00:00', '1981-02-15 12:00:00',
                     '1981-03-17 00:00:00', '1981-04-16 12:00:00',
                     '1981-05-17 00:00:00', '1981-06-16 12:00:00',
                     '1981-07-17 00:00:00', '1981-08-17 00:00:00',
                     '1981-09-16 12:00:00', '1981-10-17 00:00:00',
                     '1981-11-16 12:00:00', '1981-12-17 00:00:00',
                     '1982-01-17 00:00:00', '1982-02-15 12:00:00',
                     '1982-03-17 00:00:00', '1982-04-16 12:00:00',
                     '1982-05-17 00:00:00', '1982-06-16 12:00:00',
                     '1982-07-17 00:00:00', '1982-08-17 00:00:00',
                     '1982-09-16 12:00:00', '1982-10-17 00:00:00',
                     '1982-11-16 12:00:00', '1982-12-17 00:00:00',
                     '1983-01-17 00:00:00', '1983-02-15 12:00:

In [3]:
sys.path.insert(0, r'C:\Users\Philipe Leal\Dropbox\Profissao\Python\Estatisticas_e_Regressoes\Distribuicoes_Estatisticas\Extremes\extreme_events\extreme_events')

In [7]:
from extreme_classifier import Extreme_Classifier

In [8]:
def parse_extremes(x, distribution_type='Positive', b=True):

    y = np.where(np.abs(x)==np.inf, 0, x)  
    
    y = np.where(np.isnan(y), 0, y)
    
    if np.all(y) == 0:
        
        return x
    
    else:
        
        EE = Extreme_Classifier(distribution_type=distribution_type)
        
        EE.fit(y)
        
        Classified = EE.predict(y, b)
        
        print(Classified, '\n'*3)
        
        return Classified.codes


In [9]:

def xarray_parse_extremes(ds, dim='time', new_dim_name='classes', kwargs={'b': False, 'distribution_type':'Positive'}):
    
    filtered = xr.apply_ufunc(parse_extremes,
                              ds,
                              dask='allowed',
                              vectorize=True,
                              input_core_dims=[[dim]],
                              #exclude_dims = [[dim]],
                              output_core_dims=[[new_dim_name]],
                              kwargs=kwargs,
                              #output_dtypes = [[int]]

                             join='outer',
                             dataset_fill_value=np.nan
                             ).compute()
    
    ds['abc'] = filtered[new_dim_name]
    
    return ds
    

In [10]:
filtered = xarray_parse_extremes(ds)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Tair,xc,yc,abc
classes,time,x,y,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,1980-09-16 12:00:00,0,0,,189.222932,16.534986,0
0,1980-09-16 12:00:00,0,1,,188.968370,16.693973,0
0,1980-09-16 12:00:00,0,2,,188.712343,16.852192,0
0,1980-09-16 12:00:00,0,3,,188.454851,17.009638,0
0,1980-09-16 12:00:00,0,4,,188.195878,17.166281,0
0,1980-09-16 12:00:00,0,5,,187.935411,17.322119,0
0,1980-09-16 12:00:00,0,6,,187.673463,17.477129,0
0,1980-09-16 12:00:00,0,7,,187.410020,17.631298,0
0,1980-09-16 12:00:00,0,8,,187.145112,17.784607,0
0,1980-09-16 12:00:00,0,9,,186.878694,17.937038,0


In [37]:
filtered.coords

Coordinates:
  * time     (time) datetime64[ns] 1980-09-16T12:00:00 1980-10-17 ... 1983-08-17
    xc       (y, x) float64 189.2 189.4 189.6 189.7 ... 186.8 186.9 187.1 187.3
    yc       (y, x) float64 16.53 16.78 17.02 17.27 ... 23.72 24.0 24.27 24.54

In [None]:
filtered.to_dataframe()