In [45]:
## first time series test

# import external packages
import numpy as np
import pandas as pd
import numba
from numba import vectorize
import glob # for file search
import copy
import os # operating system stuff
import re # regex
import fastparquet # fast read/write for large data structures
import sklearn.preprocessing as pre # for data normalisation
from sklearn.metrics import pairwise_distances

import geopandas as gpd
import rasterio as rio
import rasterio.mask
from rasterio.plot import plotting_extent
from shapely.geometry import Polygon
from shapely.geometry.point import Point
import pyproj
from pyproj import CRS
from inpoly import inpoly2 # for fast inpolygon checks
import utm

import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
from matplotlib import cm as mpl_cm
from matplotlib import colors as mcolors 
import matplotlib.image as mplimg

from mpl_toolkits.axes_grid1 import make_axes_locatable # for colorbar scaling
from mpl_toolkits.axes_grid1 import ImageGrid
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import FormatStrFormatter

import seaborn as sns
from matplotlib import rc_file_defaults
rc_file_defaults()
# sns.set(style=None, color_codes=True)

from shapely.geometry import Polygon
from shapely.geometry.point import Point
import datetime

import configparser

from cmcrameri import cm # for scientific colourmaps

###########################
# import main local package
import SPOTSAR_main as sm


In [46]:
INPUT_CSV = '/Users/markbemelmans/Documents/PhD/projects/Merapi2021/CSK/dsc1/h5_file_ts_win13.csv'
# INPUT_CSV = '/Users/markbemelmans/Documents/PhD/projects/Merapi2021/TSX/staring_134/h5_file_ts_win2.csv'

NAN_FRAC_TRESH = 0

In [47]:
# load data
df = pd.read_csv(INPUT_CSV)
print(np.shape(df))

# compute nan_fraction
disp_data = df.iloc[:,2:]
n_nans = np.sum(np.isnan(disp_data),axis=1)
nan_frac = n_nans/np.shape(disp_data)[1]

# plot cumulative distribution for nan_frac

fig, ax = plt.subplots(1,1)

ax.hist(nan_frac, 100, histtype="step",
                               cumulative=True, label="Cumulative histogram")


(323032, 322)


(array([262958., 270919., 273671., 280543., 282132., 284291., 285973.,
        286848., 288213., 288780., 289707., 290535., 290967., 291663.,
        291997., 292609., 293208., 293486., 294110., 295054., 295710.,
        296035., 296702., 297891., 298208., 298842., 299112., 299693.,
        300321., 301237., 301861., 302171., 302750., 303333., 303604.,
        304120., 304395., 304843., 305284., 305523., 305925., 306129.,
        306480., 306653., 306994., 307320., 307488., 307857., 308027.,
        308346., 309376., 310265., 311421., 311606., 311973., 312360.,
        312532., 312898., 313061., 313382., 314220., 314591., 314969.,
        315135., 317498., 317678., 317956., 318186., 318295., 318491.,
        318586., 318777., 318928., 319008., 319154., 319216., 319360.,
        319508., 319587., 319708., 319786., 319927., 319985., 320082.,
        320210., 320253., 320360., 320435., 320572., 320709., 320784.,
        320950., 321035., 321183., 321397., 321508., 321815., 322015.,
      

In [48]:
# remove rows with too many NaNs
df['NaN_frac'] = nan_frac
filt_df = df[df.NaN_frac<=NAN_FRAC_TRESH]
# print(filt_df)
print(np.shape(filt_df))


(253395, 323)


In [49]:
# make design matrix

def get_date_list(df):
    columns = df.columns
    range_names = columns[2::2]
    date1 = [name[14:22] for name in range_names]
    date2 = [name[24:32] for name in range_names]
    return date1, date2

def get_design_mat(date0,date1):
    unique_dates = np.unique(np.union1d(date0,date1))
    print(len(date0))
    print(len(unique_dates))
    A = np.zeros([len(date0),len(unique_dates)-1])
    for dates in enumerate(zip(date0,date1)):
#         print(unique_dates)
#         print(dates[1][0])
        print(np.where(unique_dates==dates[1][0])[0][0])
        A[dates[0],np.where(unique_dates==dates[1][1])[0][0]-1] = 1
        if np.where(unique_dates==dates[1][0])[0][0]!=0:
            A[dates[0],np.where(unique_dates==dates[1][0])[0][0]-1] = -1
    
    return A[:-1,1:]
    


date1, date2 = get_date_list(filt_df)

A = get_design_mat(date1,date2)    
print(A)

161
56
1
1
1
2
2
2
2
3
3
3
4
4
4
5
5
5
6
6
6
7
7
7
8
8
8
9
9
9
10
10
10
11
11
11
12
12
12
13
13
13
14
14
14
15
15
15
16
16
16
17
17
17
18
18
18
19
19
19
20
20
20
21
21
21
22
22
22
23
23
23
24
24
24
25
25
25
26
26
26
27
27
27
28
28
28
29
29
29
30
30
30
31
31
31
32
32
32
33
33
33
34
34
34
35
35
35
36
36
36
37
37
37
38
38
38
39
39
39
40
40
40
41
41
41
42
42
42
43
43
43
44
44
44
45
45
45
46
46
46
47
47
47
48
48
48
49
49
49
50
50
50
51
51
51
52
52
52
53
53
54
0
[[ 1.  0.  0. ...  0.  0.  0.]
 [ 0.  1.  0. ...  0.  0.  0.]
 [ 0.  0.  1. ...  0.  0.  0.]
 ...
 [ 0.  0.  0. ... -1.  1.  0.]
 [ 0.  0.  0. ... -1.  0.  1.]
 [ 0.  0.  0. ...  0. -1.  1.]]


In [50]:
fig, ax = plt.subplots(1,1)

ax.scatter(filt_df['Longitude'],filt_df['Latitude'],s=10,c=filt_df['Range_offset_c20201013_c20201113'])

<matplotlib.collections.PathCollection at 0x7f82449a5d30>

In [51]:
# solve least square problem Ax = B with A as image pair matrix, x, 
# displacement time series and B displacement for each image pair as per the rows of A.
# code block runs at about 3 min per 100.000 points

n_points = np.shape(filt_df)[0]
rdisp_ts = np.empty(shape=(n_points, A.shape[1]+1), dtype=float) # pre-alloc nd-array
print(np.shape(rdisp_ts))
unique_dates = pd.to_datetime(np.unique(np.union1d(date1,date2)))

def run_LS_inversion(df,rdisp_ts, A, r_a = 'r', prog_mod=5000):
    if r_a == 'r':
        disp_data = df.iloc[:,2::2]
    elif r_a == 'a':
        disp_data = df.iloc[:,3::2]
    else:
        print("ERROR: wrong value for r_a, options are 'r' or 'a'.")


    for i, (index,point) in enumerate(disp_data.iterrows()):
        if np.mod(i,prog_mod)==0:
            print('fraction complete:',i/(np.shape(df)[0])) # progress tracker)
        # B = [0] + point[:-1].to_list()
        B = point[:-1].to_list()
        ts = np.linalg.lstsq(A, B, rcond=None)[0]
        rdisp_ts[i,:] = np.insert(ts,0,0)

run_LS_inversion(filt_df, rdisp_ts, A, r_a='r', prog_mod=5000)

# for p in enumerate([:,0]):
#     if np.mod(p[0],5000)==0:
#                 print('fraction complete:',p[0]/(height*width)) # progress tracker
# #                 print(rdisp[p[0],:])
#     if np.any(rdisp[p[0],:]==0):
#         rdisp_ts[p[0],:] = np.nan # inset NaN for no data value 0.0
#     else:
#             B = rdisp[p[0],:]
#             rdisp_ts[p[0],:] = np.linalg.lstsq(A, B, rcond=None)[0]


(253395, 55)
fraction complete: 0.0
fraction complete: 0.019732038911580735
fraction complete: 0.03946407782316147
fraction complete: 0.0591961167347422
fraction complete: 0.07892815564632294
fraction complete: 0.09866019455790367
fraction complete: 0.1183922334694844
fraction complete: 0.13812427238106514
fraction complete: 0.15785631129264588
fraction complete: 0.1775883502042266
fraction complete: 0.19732038911580735
fraction complete: 0.21705242802738808
fraction complete: 0.2367844669389688
fraction complete: 0.2565165058505495
fraction complete: 0.2762485447621303
fraction complete: 0.295980583673711
fraction complete: 0.31571262258529176
fraction complete: 0.33544466149687246
fraction complete: 0.3551767004084532
fraction complete: 0.37490873932003393
fraction complete: 0.3946407782316147
fraction complete: 0.4143728171431954
fraction complete: 0.43410485605477617
fraction complete: 0.4538368949663569
fraction complete: 0.4735689338779376
fraction complete: 0.49330097278951834
f

In [54]:
%matplotlib osx
# quickly check displacement time series
print(np.shape(rdisp_ts))

alpha = 0.1
# fig, ax = plt.subplots(1,1)

# for row in rdisp_ts[::100,:]:
#     ax.plot(row,color=[0,0,0,alpha])

dist = 30/110123.84

# q_lon = 110.442038
# q_lat = -7.537660

# q_lon = 110.444153
# q_lat = -7.533771

q_lon = 110.432438
q_lat = -7.541523

filt1 = np.abs(filt_df.Longitude.to_numpy()-q_lon)<dist
filt2 = np.abs(filt_df.Latitude.to_numpy()-q_lat)<dist
idx = np.argwhere(filt1 & filt2)
print(np.shape(idx))


fig, ax = plt.subplots(1,1)
unique_dates = pd.to_datetime(np.unique(np.union1d(date1,date2)))[1:]
print(unique_dates)
for i in idx:
    # print(i)
    print(rdisp_ts[i,:])
    ax.plot(unique_dates,rdisp_ts[i,:][0],color=[0,0,0,alpha])

percentiles = np.empty(shape=(np.shape(rdisp_ts[0,:][0]),3), dtype=float)

for id in enumerate(percentiles[:,0]):
    if id[0]==0:
        percentiles[id[0],:] = [0,0,0]
    else:
        percentiles[id[0],:] = [np.nanpercentile(df_rdisp_ts.iloc[idx,id[0]-1].to_numpy(),2.5), 
                                np.nanpercentile(df_rdisp_ts.iloc[idx,id[0]-1].to_numpy(),50), 
                                np.nanpercentile(df_rdisp_ts.iloc[idx,id[0]-1].to_numpy(),97.5)]

yerr = np.abs([percentiles[:,0]-percentiles[:,1],percentiles[:,2]-percentiles[:,1]])
ax.errorbar(unique_dates,
            percentiles[:,1],
            yerr = yerr, 
            color='tab:orange',
            zorder=10)

(253395, 55)
(49, 1)
DatetimeIndex(['2020-09-10', '2020-09-19', '2020-09-26', '2020-09-27',
               '2020-10-05', '2020-10-12', '2020-10-13', '2020-11-13',
               '2020-11-14', '2020-11-22', '2020-12-24', '2020-12-31',
               '2021-01-01', '2021-01-09', '2021-01-16', '2021-01-17',
               '2021-01-25', '2021-02-01', '2021-02-02', '2021-02-17',
               '2021-02-18', '2021-03-02', '2021-03-05', '2021-03-06',
               '2021-03-14', '2021-03-21', '2021-03-22', '2021-03-30',
               '2021-04-06', '2021-04-19', '2021-04-22', '2021-04-23',
               '2021-05-01', '2021-05-08', '2021-05-09', '2021-05-21',
               '2021-06-09', '2021-07-11', '2021-07-20', '2021-07-27',
               '2021-07-28', '2021-08-12', '2021-08-13', '2021-08-29',
               '2021-09-10', '2021-09-13', '2021-09-14', '2021-09-22',
               '2021-09-26', '2021-09-30', '2021-10-15', '2021-10-16',
               '2021-10-24', '2021-10-31', '2021-11-01']

: 

In [41]:
# add coordinates back and add column headings
lons = filt_df.Longitude.to_numpy()
lats = filt_df.Latitude.to_numpy()
print(np.shape(lons))
rdisp_ts_lon_lat = np.column_stack((lons,lats,rdisp_ts))

# get list of column names
col_names = ['Longitude','Latitude']
col_names += list(np.unique(np.union1d(date1,date2))[1:]) # don't question it
print(col_names)

# make dataframe
rdisp_ts_df = pd.DataFrame(rdisp_ts_lon_lat,columns=col_names)
print(rdisp_ts_df)


(45858,)
['Longitude', 'Latitude', '20201116', '20201127', '20201208', '20201219', '20210110', '20210201', '20210223', '20210317', '20210408', '20210430', '20210522', '20210602', '20210613', '20211001', '20211023']
        Longitude  Latitude  20201116  20201127  20201208  20201219  20210110  \
0      110.464058 -7.536289       0.0  0.002694  0.078262  0.147044  0.076470   
1      110.463875 -7.536327       0.0  0.004395  0.084953  0.133652  0.075474   
2      110.463799 -7.536384       0.0  0.047095  0.095977  0.142428  0.086976   
3      110.463669 -7.536431       0.0  0.053676  0.078267  0.125058  0.094378   
4      110.463432 -7.536457       0.0  0.041080  0.084951  0.119469  0.091398   
...           ...       ...       ...       ...       ...       ...       ...   
45853  110.424774 -7.539648       0.0  0.023086  0.011086  0.008328  0.020930   
45854  110.424759 -7.539718       0.0  0.018004  0.004000  0.005996  0.019521   
45855  110.424751 -7.539792       0.0  0.022102  0.00757

In [12]:
# show time series displacement maps

%matplotlib osx
plt.close('all')
fig, ax = plt.subplots(4,4)

df = rdisp_ts_df
for i,a in zip(range(np.shape(rdisp_ts_df)[1]-2),ax.ravel()):
    print(i)
    a.hexbin(df.Longitude,df.Latitude,df.iloc[:,2+i],gridsize=1000,cmap=cm.vik,vmin=-5, vmax=5)
    a.set_axis_off()

ax[3,3].set_axis_off()
fig.subplots_adjust(wspace=0,hspace=0)



# ax.hexbin(df.Longitude,df.Latitude,df.iloc[:,2+date_index],gridsize=1000,cmap=cm.vik,vmin=-1, vmax=1)


NameError: name 'rdisp_ts_df' is not defined