In [62]:
# Import libraries
import os
import xarray as xr
import pandas as pd
import numpy as np
import pymannkendall as mk
import rasterio
import matplotlib.pyplot as plt
import cartopy

In [2]:
# Data directory
data_dir = 'C:/Users/Holger/EGU2021/SC5.8/data'

In [32]:
# Import monthly precipitation data
precip_fp = os.path.join(data_dir, 'monthly_precip_baltics.nc')
precip_ds = xr.open_dataset(precip_fp)
precip_ds

In [33]:
# Convert the xarray to Pandas dataframe
precip_df = precip_ds.to_dataframe().reset_index()
precip_df

Unnamed: 0,latitude,longitude,time,spatial_ref,precip_sum
0,53.949860,21.04986,2001-01-31,0,
1,53.949860,21.04986,2001-02-28,0,
2,53.949860,21.04986,2001-03-31,0,
3,53.949860,21.04986,2001-04-30,0,
4,53.949860,21.04986,2001-05-31,0,
...,...,...,...,...,...
984955,59.549859,28.14986,2020-08-31,0,
984956,59.549859,28.14986,2020-09-30,0,
984957,59.549859,28.14986,2020-10-31,0,
984958,59.549859,28.14986,2020-11-30,0,


In [34]:
# Drop rows with missing precipitation values
precip_df = precip_df.dropna().reset_index()
precip_df

Unnamed: 0,index,latitude,longitude,time,spatial_ref,precip_sum
0,6000,53.949860,23.54986,2001-01-31,0,32.099998
1,6001,53.949860,23.54986,2001-02-28,0,22.000000
2,6002,53.949860,23.54986,2001-03-31,0,39.799999
3,6003,53.949860,23.54986,2001-04-30,0,32.400002
4,6004,53.949860,23.54986,2001-05-31,0,57.200005
...,...,...,...,...,...,...
611215,980635,59.549859,26.34986,2020-08-31,0,68.700005
611216,980636,59.549859,26.34986,2020-09-30,0,38.700001
611217,980637,59.549859,26.34986,2020-10-31,0,42.400002
611218,980638,59.549859,26.34986,2020-11-30,0,40.200001


In [35]:
# Extract month from the observation date as a new column
precip_df['month'] = precip_df['time'].dt.strftime('%m')
precip_df

Unnamed: 0,index,latitude,longitude,time,spatial_ref,precip_sum,month
0,6000,53.949860,23.54986,2001-01-31,0,32.099998,01
1,6001,53.949860,23.54986,2001-02-28,0,22.000000,02
2,6002,53.949860,23.54986,2001-03-31,0,39.799999,03
3,6003,53.949860,23.54986,2001-04-30,0,32.400002,04
4,6004,53.949860,23.54986,2001-05-31,0,57.200005,05
...,...,...,...,...,...,...,...
611215,980635,59.549859,26.34986,2020-08-31,0,68.700005,08
611216,980636,59.549859,26.34986,2020-09-30,0,38.700001,09
611217,980637,59.549859,26.34986,2020-10-31,0,42.400002,10
611218,980638,59.549859,26.34986,2020-11-30,0,40.200001,11


In [37]:
# Group locations by month and collect observation values to NumPy array
precip_group_df = precip_df.groupby(['latitude', 'longitude', 'month'])['precip_sum'].apply(np.array).reset_index()
precip_group_df

Unnamed: 0,latitude,longitude,month,precip_sum
0,53.949860,23.54986,01,"[32.1, 45.500008, 26.5, 40.999996, 52.4, 11.9,..."
1,53.949860,23.54986,02,"[22.0, 63.7, 18.1, 44.800003, 22.7, 39.5, 31.0..."
2,53.949860,23.54986,03,"[39.8, 26.1, 10.5, 37.2, 35.699997, 13.900001,..."
3,53.949860,23.54986,04,"[32.4, 13.200001, 29.500002, 38.5, 18.199999, ..."
4,53.949860,23.54986,05,"[57.200005, 34.9, 62.8, 58.300003, 77.9, 45.19..."
...,...,...,...,...
30559,59.549859,26.34986,08,"[60.800003, 11.6, 115.799995, 82.80001, 91.399..."
30560,59.549859,26.34986,09,"[43.5, 12.200001, 40.300003, 92.00001, 30.2, 3..."
30561,59.549859,26.34986,10,"[74.7, 36.600002, 58.3, 53.4, 36.5, 100.59999,..."
30562,59.549859,26.34986,11,"[70.200005, 78.100006, 51.100002, 56.1, 51.4, ..."


In [46]:
precip_array = precip_group_df['precip_sum'].to_numpy()
precip_array

array([array([32.1     , 45.500008, 26.5     , 40.999996, 52.4     , 11.9     ,
       81.200005, 49.5     , 36.500004, 26.400002, 27.700003, 51.500004,
       38.500004, 58.4     , 56.2     , 27.499998, 21.499998, 36.3     ,
       45.6     , 39.8     ], dtype=float32),
       array([22.      , 63.7     , 18.1     , 44.800003, 22.7     , 39.5     ,
       31.      , 23.      , 31.300001, 35.2     , 49.2     , 27.199999,
       31.999998, 23.000002,  8.      , 57.200005, 32.600002, 12.3     ,
       16.300001, 43.6     ], dtype=float32),
       array([39.8     , 26.1     , 10.5     , 37.2     , 35.699997, 13.900001,
       23.599998, 58.1     , 45.7     , 25.6     , 16.1     , 22.8     ,
       27.800001, 32.9     , 43.      , 46.200005, 61.300007, 23.9     ,
       40.3     , 19.6     ], dtype=float32),
       ...,
       array([ 74.7     ,  36.600002,  58.3     ,  53.4     ,  36.5     ,
       100.59999 ,  59.699997,  68.5     , 120.7     ,  45.6     ,
        54.000004,  82.200005, 

In [47]:
precip_dict = {}
precip_dict['trend'] = []
precip_dict['slope'] = []
for array in precip_array:
    result = mk.original_test(array)
    precip_dict['trend'].append(result.trend)
    precip_dict['slope'].append(result.slope)

In [49]:
precip_group_df['trend'] = precip_dict['trend']
precip_group_df['slope'] = precip_dict['slope']
display(precip_group_df['trend'].value_counts())
display(precip_group_df['slope'].describe())
display(precip_group_df)

no trend      30384
increasing      142
decreasing       38
Name: trend, dtype: int64

count    30564.000000
mean        -0.168654
std          0.828888
min         -3.290909
25%         -0.705000
50%         -0.085714
75%          0.368914
max          4.058333
Name: slope, dtype: float64

Unnamed: 0,latitude,longitude,month,precip_sum,trend,slope
0,53.949860,23.54986,01,"[32.1, 45.500008, 26.5, 40.999996, 52.4, 11.9,...",no trend,0.113462
1,53.949860,23.54986,02,"[22.0, 63.7, 18.1, 44.800003, 22.7, 39.5, 31.0...",no trend,-0.379048
2,53.949860,23.54986,03,"[39.8, 26.1, 10.5, 37.2, 35.699997, 13.900001,...",no trend,0.433333
3,53.949860,23.54986,04,"[32.4, 13.200001, 29.500002, 38.5, 18.199999, ...",no trend,0.603571
4,53.949860,23.54986,05,"[57.200005, 34.9, 62.8, 58.300003, 77.9, 45.19...",no trend,-0.925000
...,...,...,...,...,...,...
30559,59.549859,26.34986,08,"[60.800003, 11.6, 115.799995, 82.80001, 91.399...",no trend,-0.416667
30560,59.549859,26.34986,09,"[43.5, 12.200001, 40.300003, 92.00001, 30.2, 3...",no trend,0.793750
30561,59.549859,26.34986,10,"[74.7, 36.600002, 58.3, 53.4, 36.5, 100.59999,...",no trend,-0.075000
30562,59.549859,26.34986,11,"[70.200005, 78.100006, 51.100002, 56.1, 51.4, ...",no trend,-1.059722


In [54]:
precip_group_df['trend_numeric'] = precip_group_df['trend'].apply(lambda x: -1 if x == 'decreasing' else (1 if x == 'increasing' else 0))
display(precip_group_df.sample(10))
display(precip_group_df['trend_numeric'].value_counts())

Unnamed: 0,latitude,longitude,month,precip_sum,trend,slope,trend_numeric
5756,55.449859,23.94986,9,"[74.20001, 26.300001, 34.7, 58.500004, 34.7, 7...",no trend,-0.459524,0
5822,55.449859,24.54986,3,"[47.699997, 39.500004, 9.0, 43.700005, 42.8, 2...",no trend,-0.018254,0
8535,55.849859,25.04986,4,"[75.0, 26.2, 39.3, 14.400001, 27.1, 31.100002,...",no trend,0.262857,0
25173,58.449859,24.84986,10,"[96.200005, 33.9, 44.000004, 74.2, 67.7, 136.5...",no trend,-0.207142,0
30395,59.449859,26.54986,12,"[27.900003, 21.7, 61.9, 39.9, 29.200003, 54.2,...",no trend,0.029166,0
17302,56.949859,22.04986,11,"[112.1, 40.0, 48.7, 44.600002, 56.199997, 43.3...",no trend,0.619643,0
17863,56.949859,26.74986,8,"[68.600006, 0.0, 125.40001, 67.3, 102.700005, ...",no trend,-0.74762,0
23682,58.149859,25.64986,7,"[103.899994, 45.4, 95.59999, 90.700005, 49.199...",no trend,-0.696667,0
6687,55.549859,26.44986,4,"[83.5, 13.9, 34.900005, 15.6, 12.300001, 42.19...",no trend,0.575,0
9399,55.949859,25.84986,4,"[83.4, 16.5, 38.8, 11.0, 24.699999, 42.7, 25.5...",no trend,0.644445,0


 0    30384
 1      142
-1       38
Name: trend_numeric, dtype: int64

In [56]:
subset = precip_group_df[precip_group_df['month'] == '09']
subset

Unnamed: 0,latitude,longitude,month,precip_sum,trend,slope,trend_numeric
8,53.949860,23.54986,09,"[103.00001, 16.4, 31.0, 24.600002, 32.9, 51.6,...",no trend,0.259127,0
20,53.949860,23.64986,09,"[99.600006, 17.000002, 31.8, 25.199999, 33.0, ...",no trend,0.310714,0
32,53.949860,23.74986,09,"[99.1, 17.0, 34.2, 25.2, 33.4, 49.8, 32.7, 46....",no trend,0.046591,0
44,53.949860,24.04986,09,"[95.39999, 19.0, 29.2, 26.8, 32.0, 48.200005, ...",no trend,0.378889,0
56,53.949860,24.34986,09,"[89.399994, 19.3, 26.6, 29.1, 29.6, 54.000004,...",no trend,0.411111,0
...,...,...,...,...,...,...,...
30512,59.549859,25.94986,09,"[48.1, 8.0, 49.7, 92.200005, 27.6, 30.800001, ...",no trend,1.448931,0
30524,59.549859,26.04986,09,"[47.899998, 9.400001, 49.7, 92.0, 29.3, 31.800...",no trend,1.413889,0
30536,59.549859,26.14986,09,"[46.4, 10.6, 52.7, 94.4, 28.900002, 32.4, 103....",no trend,0.924167,0
30548,59.549859,26.24986,09,"[45.199997, 12.3, 45.2, 92.4, 32.2, 31.7, 99.1...",no trend,0.973016,0
