# 1 Import Packages

In [1]:
import xarray as xr
import datetime
import sys
import numpy as np
import pandas as pd

# 2 Load Test Data

In [2]:
src_dir = "../climtools/src"

In [3]:
sys.path.append(src_dir)

In [4]:
import climtools_exceptions

## Register xarray

In [5]:
def anomaly(data, dims, weights = None):
    """
    Calculates the anomaly with respect to the specified dimensions

    Parameters
    ----------
    dims: dictionary of dimensions with respect to which anomalies are calculated
    Return
    ------
    data_anom: xarray dataarray or dataset
        Calculated Anomalieshttps://monitoring.dkrz.de/d/000000008/home?orgId=5&refresh=10s
    """
    
    
    
    return data-data.mean(dim=dims)

In [6]:
@xr.register_dataarray_accessor("preproc")

class preproc:
    def __init__(self, xarray_obj):
        self._obj = xarray_obj

        
    def time_anomaly(self, dim, timeframe):
        """
        Calculates anomalies with respect to a given timeframe
        
        
        """    
        #TODO Check for dtypes of dimensions, but cftime makes problems
        
        grouped = self._obj.groupby(dim+"."+timeframe)
        
        
        return anomaly(grouped, dims=dim)
    
    
    def anomaly(self, dims={"time"}):
        """
        Calculates the anomaly with respect to the specified dimensions

        Parameters
        ----------
        dims: dictionary of dimensions with respect to which anomalies are calculated
        Return
        ------
        data_anom: xarray dataarray or dataset
            Calculated Anomalieshttps://monitoring.dkrz.de/d/000000008/home?orgId=5&refresh=10s
        """
        
        return anomaly(self._obj, dims= dims)

    def normalize(self, dims={"time"}):
        """
        Normalizes  with respect to the specified dimensions

        Parameters
        ----------
        dims: dictionary of dimensions with respect to which anomalies are calculated
        
        Return
        ------
        data_anom: xarray dataarray or dataset
            Calculated Anomalies
        """
        
        return anomaly(self.data, dims=dims)/self.data.std(dim=dims)


    def sellonlatbox(self, lon_min, lon_max, lat_min, lat_max):
        """
        Selects a longitude-latitude box from a dataset
        Parameters:
        -----------
        data: xarray dataset or dataarray
            Input field, must have at least the dimension (lat,lon)
        lon_min: float
            Minimum longitude
        lon_max: float
            Maximum longitude
        lat_min: float
            Minimum latitude
        lat_max: float
            Maximum latitude

        Output:
        -------
        data_cropped: xarray dataset or dataarray
            Cropped Field
        """
        
        
        data_cropped = self.data.where( (self.data.lat<lat_max)&(self.data.lat>lat_min)&(self.data.lon<lon_max)&(self.data.lon>lon_min)  )

        return data_cropped

        
    
    def calculate_weights(self,weight_keyword):
        """
        Calculates the weights of a given array based upon the keyword
        
        """
        
        if weight_keyword == "cosine_lat":
            weights = np.cos(np.deg2rad(self.lat))
            


In [10]:
da_lonlat = xr.DataArray(np.random.normal(0,1, size= (180,360)), dims = ["lat","lon"], coords = {"lat":np.arange(-90,90),"lon":np.arange(-180,180)})

In [11]:
da_lonlat_weighted = da_lonlat.weighted(np.cos(np.deg2rad(da_lonlat.lat)))

In [12]:
da_lonlat_weighted - da_lonlat_weighted.mean(dim=("lon","lat"))

TypeError: unsupported operand type(s) for -: 'DataArrayWeighted' and 'float'

In [7]:
times = pd.date_range("2000-01-01","2010-12-31", name="time",freq="H")

In [8]:
annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28))


In [9]:
da = xr.DataArray(annual_cycle, dims = ["time"], coords = {"time":times})

In [34]:
da.coords.

Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-01T01:00:00 ... 2010-12-31

In [29]:
type(da.time[0].values)

numpy.datetime64

In [31]:
da.time.dtype

dtype('<M8[ns]')

In [21]:
da.preproc.time_anomaly(timeframe="season")

In [58]:
da.mean(dim="time")

In [63]:
da.preproc.time_anomaly(timeframe = "Month")

TypeError: time_anomaly() missing 1 required positional argument: 'timeframe'

In [57]:
da.preproc.anomaly(dims="time")

<xarray.DataArray (time: 96409)>
array([-0.98536517, -0.98536517, -0.98536517, ..., -0.97803121,
       -0.97803121, -0.98147232])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-01T01:00:00 ... 2010-12-31


In [80]:
cftime.DatetimeJulian(2015, 3, 31, 0, 0, 0, 0, 0, 90)

NameError: name 'cftime' is not defined

In [103]:
da.groupby("time.month").mean("time")

In [122]:
da_decoded = xr.decode_cf(da.rename("pressure").to_dataset(),use_cftime=True)

In [123]:
da_decoded

In [115]:
da_decoded.time

In [124]:
da_decoded.groupby("time.month")

DatasetGroupBy, grouped over 'month'
12 groups with labels 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12.

In [84]:

da.groupby("time.year").mean(dim="time")

In [17]:
da.to_netcdf("test.nc")

In [74]:
da = xr.load_dataarray("test.nc",use_cftime=True)

In [78]:
da["time"][0]

In [76]:
da.groupby("time.month").mean(dim="time")

In [128]:
import importliba

In [130]:
importlib.reload(xr)

<module 'xarray' from '/home/bjoern/anaconda3/envs/climtools_test/lib/python3.9/site-packages/xarray/__init__.py'>