In [82]:
## first time series test

# import external packages
import numpy as np
import pandas as pd
import numba
from numba import vectorize
import glob # for file search
import copy
import os # operating system stuff
import re # regex
import fastparquet # fast read/write for large data structures
import sklearn.preprocessing as pre # for data normalisation
from sklearn.metrics import pairwise_distances

import geopandas as gpd
import rasterio as rio
import rasterio.mask
from rasterio.plot import plotting_extent
from shapely.geometry import Polygon
from shapely.geometry.point import Point
import pyproj
from pyproj import CRS
from inpoly import inpoly2 # for fast inpolygon checks
import utm

import matplotlib.pyplot as plt 
import matplotlib.dates as mdates
from matplotlib import cm as mpl_cm
from matplotlib import colors as mcolors 
import matplotlib.image as mplimg

from mpl_toolkits.axes_grid1 import make_axes_locatable # for colorbar scaling
from mpl_toolkits.axes_grid1 import ImageGrid
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import FormatStrFormatter

import seaborn as sns
from matplotlib import rc_file_defaults
rc_file_defaults()
# sns.set(style=None, color_codes=True)

from shapely.geometry import Polygon
from shapely.geometry.point import Point
import datetime

import configparser

from cmcrameri import cm # for scientific colourmaps

###########################
# import main local package
import SPOTSAR_main as sm


In [90]:
import h5py

def extract_date_from_filename(filename):
    match = re.search(r'x\d{8}_x\d{8}', filename)
    if match:
        return match.group()
    else:
        return None

def process_hdf5_file(file_path,pairname):
    with h5py.File(file_path, 'r') as h5_file:
        longitude = h5_file['win_3_Lon_post'][:]
        latitude = h5_file['win_3_lat_post'][:]
        range_offset = h5_file['win_3_R_off_post'][:]
        azimuth_offset = h5_file['win_3_A_off_post'][:]

    # Convert 2D arrays to 1D by flattening
    # fig, ax = plt.subplots(1,1)
    # ax.scatter(longitude,latitude)
    longitude_arr = longitude.ravel()
    latitude_arr = latitude.ravel()
    # fig, ax = plt.subplots(1,1)
    # ax.scatter(longitude_arr,latitude_arr)

    range_offset_arr = range_offset.ravel()
    azimuth_offset_arr = azimuth_offset.ravel()

    return pd.DataFrame({
        'Longitude': longitude_arr,
        'Latitude': latitude_arr,
        f'Range_offset_{pairname}': range_offset_arr,
        f'Azimuth_offset_{pairname}': azimuth_offset_arr
    }) 


input_folder = '/Users/markbemelmans/Documents/PhD/projects/Merapi2021/TSX/staring_134/h5_files/'
output_csv = '/Users/markbemelmans/Documents/PhD/projects/Merapi2021/TSX/staring_134/h5_file_ts_win3.csv'

file_list = sorted(os.listdir(input_folder))
file_list.sort(key=extract_date_from_filename)

dfs = []
columns = ['Longitude', 'Latitude']
for file_name in file_list:
    file_path = os.path.join(input_folder, file_name)
    pairname = extract_date_from_filename(file_name)
    
    df = process_hdf5_file(file_path,pairname)
    df_no_na = df.dropna()
    df_no_dupl = df_no_na.drop_duplicates(subset=['Longitude','Latitude'], keep='first')
    print(np.shape(df_no_dupl),np.shape(df))
    dfs.append(df_no_dupl)

# final_df = []
# for df_i in dfs:
#     if len(final_df)==0:
#         final_df = df_i
#     else:
#         final_df = final_df.merge(df_i,on=['Longitude','Latitude'],how='outer')
# # final_df = pd.concat(dfs, axis=1)

# final_df.to_csv(output_csv, index=False, columns=columns)

# if __name__ == "__main__":
#     input_folder = '/Users/markbemelmans/Documents/PhD/projects/Merapi2021/TSX/staring_134/h5_files/'
#     output_csv = '/Users/markbemelmans/Documents/PhD/projects/Merapi2021/TSX/staring_134/h5_file_ts_test.csv'
#     main(input_folder, output_csv)


(117149, 4) (128007, 4)
(113835, 4) (126300, 4)
(113578, 4) (128214, 4)
(116717, 4) (125334, 4)
(112854, 4) (124443, 4)
(55345, 4) (65985, 4)
(113949, 4) (122700, 4)
(55729, 4) (65610, 4)
(97300, 4) (117600, 4)
(61022, 4) (69720, 4)
(104268, 4) (123012, 4)
(97728, 4) (117810, 4)
(65557, 4) (67536, 4)
(62174, 4) (64680, 4)
(64511, 4) (67368, 4)
(110857, 4) (117810, 4)
(111845, 4) (122706, 4)
(109154, 4) (122400, 4)
(107575, 4) (117810, 4)
(105107, 4) (117810, 4)
(103171, 4) (117810, 4)
(111669, 4) (122400, 4)
(109613, 4) (122706, 4)
(107153, 4) (121482, 4)
(110095, 4) (122400, 4)
(107618, 4) (121482, 4)
(76079, 4) (84400, 4)
(109945, 4) (121482, 4)
(78437, 4) (86088, 4)
(102322, 4) (116892, 4)
(77112, 4) (83767, 4)
(103661, 4) (116892, 4)
(89505, 4) (119340, 4)
(73502, 4) (80602, 4)
(64151, 4) (82290, 4)
(68489, 4) (86088, 4)
(87461, 4) (116892, 4)
(87006, 4) (116892, 4)
(97501, 4) (119340, 4)


In [91]:
%matplotlib osx
plt.close('all')
fig, ax = plt.subplots(1,1)

df1 = dfs[3]

ax.hexbin(df1.Longitude,df1.Latitude,df1.iloc[:,2],gridsize=1000,cmap=cm.vik,vmin=-1, vmax=1)



<matplotlib.collections.PolyCollection at 0x7fc62a53f610>

In [5]:
# # find duplicated rows

# df = dfs[0]
# columns_to_check = ['Longitude','Latitude']
# duplicate_rows = df.duplicated(subset=columns_to_check, keep=False)


# df_no_duplicates = df.drop_duplicates(subset=columns_to_check, keep='first')
# print("Duplicate Rows:")
# print(df[duplicate_rows])
# print(np.sum(duplicate_rows))

# print(df_no_duplicates)


In [92]:
# test how many matches there are between dfs
%matplotlib osx

print(np.shape(pd.unique(dfs[0]['Longitude'])))
print(np.shape(pd.unique(dfs[0]['Latitude'])))
fig, ax = plt.subplots(1,1)


comb_df = pd.merge(dfs[0], dfs[1], on=['Longitude','Latitude'],how='outer')
comb_df = comb_df.dropna()
for i in range(2,39):
    print(i)
    comb_df = pd.merge(comb_df, dfs[i], on=['Longitude','Latitude'],how='outer')
    # comb_df = comb_df.dropna()
    print(np.shape(comb_df))
    ax.scatter(i,np.shape(comb_df)[0])
    # print(np.sum(comb_df.isna()))
print(np.shape(comb_df))

# small_comb_df = comb_df[::25]
comb_df.to_csv(output_csv, index=False)

(6426,)
(48553,)
2
(119319, 8)
3
(123226, 10)
4
(123280, 12)
5
(123306, 14)
6
(124279, 16)
7
(124283, 18)
8
(124284, 20)
9
(124433, 22)
10
(124472, 24)
11
(124480, 26)
12
(127029, 28)
13
(127073, 30)
14
(127099, 32)
15
(128103, 34)
16
(128160, 36)
17
(128170, 38)
18
(128220, 40)
19
(128232, 42)
20
(128237, 44)
21
(128294, 46)
22
(128318, 48)
23
(128320, 50)
24
(128324, 52)
25
(128324, 54)
26
(128324, 56)
27
(128329, 58)
28
(128329, 60)
29
(128333, 62)
30
(128337, 64)
31
(128366, 66)
32
(128377, 68)
33
(128471, 70)
34
(128481, 72)
35
(128481, 74)
36
(128481, 76)
37
(128483, 78)
38
(128513, 80)
(128513, 80)


In [249]:
fig, ax = plt.subplots(1,1)
ax.scatter(dfs[0]['Longitude'],dfs[0]['Latitude'])
ax.scatter(dfs[5]['Longitude'],dfs[5]['Latitude'])
ax.scatter(comb_df['Longitude'],comb_df['Latitude'])

<matplotlib.collections.PathCollection at 0x7fd16e2fa3a0>

In [146]:
test_df = dfs[0]
test_df = test_df[test_df.duplicated(['Longitude','Latitude'], keep=False)]
print(np.shape(test_df))
print(test_df[10:20])
# print(np.size(test_df==True))

(50, 4)
       Longitude  Latitude  Range_offset_x20201116_x20201127  \
1479  110.468430 -7.558755                           -0.0440   
1572  110.461197 -7.542143                            0.0560   
1576  110.461601 -7.542523                           -0.0170   
1776  110.468430 -7.558755                           -0.0140   
1869  110.461197 -7.542143                            0.0850   
1873  110.461601 -7.542523                           -0.0360   
3217  110.464180 -7.554617                            0.0795   
3811  110.464180 -7.554617                            0.0940   
4071  110.463280 -7.551690                            0.0215   
4665  110.463280 -7.551690                           -0.0280   

      Azimuth_offset_x20201116_x20201127  
1479                             -0.0150  
1572                             -0.1165  
1576                             -0.0025  
1776                              0.3030  
1869                             -0.0690  
1873                         

In [149]:
print(np.shape(comb_df.dropna()))

(113019, 6)


      Longitude  Latitude  Range_offset_x20201116_x20201127  \
100  110.462173 -7.543308                            0.0390   
101  110.462311 -7.543409                            0.0445   
102  110.462349 -7.543492                            0.0180   
103  110.462341 -7.543565                            0.0020   
104  110.462357 -7.543643                           -0.0190   
105  110.462372 -7.543719                           -0.0030   
106  110.462410 -7.543802                            0.0055   
107  110.462357 -7.543865                           -0.0030   
108  110.462082 -7.543883                            0.0280   
109  110.462029 -7.543946                            0.0240   
110  110.461983 -7.544010                            0.0410   
111  110.461739 -7.544034                            0.0660   
112  110.461754 -7.544110                            0.0310   
113  110.461853 -7.544205                           -0.0125   
114  110.462036 -7.544317                           -0.

In [54]:
%matplotlib osx
import pandas as pd

# Sample dataframes
df1 = dfs[0][0:4000]
df2 = dfs[1][0:4000]
# df2 = pd.DataFrame({'A': [1, 2], 'B': [3, 5], 'C': [11, 12]})
# df3 = pd.DataFrame({'A': [13, 14], 'B': [15, 16], 'C': [17, 18]})

fig, ax = plt.subplots(1,1)

ax.scatter(df1['Longitude'],df1['Latitude'],s=20)
# ax.scatter(df2['Longitude'],df2['Latitude'],s=5)

# Merge dataframes on columns 'A' and 'B'
# result_df = pd.merge(df1, df2, on=['A', 'B']).merge(df3, on=['A', 'B'])

result_df = df1.merge(df2,on=['Longitude','Latitude'], how='outer')

# Rename columns to C1, C2, C3, etc.
result_df.columns = ['A', 'B', 'r_off1', 'a_off1','r_off2','a_off2']


# print(df1)
# print(df2)
print(result_df[5540:5560])

               A         B  r_off1  a_off1  r_off2  a_off2
5540  110.465759 -7.557611 -0.0255 -0.0540 -0.1090  0.0540
5541  110.465828 -7.557700 -0.0510 -0.1075 -0.0610  0.0480
5542  110.466003 -7.557811 -0.0350 -0.1070 -0.0545  0.1180
5543  110.466080 -7.557901 -0.0760 -0.1920 -0.0470  0.1450
5544  110.466095 -7.557976 -0.0470 -0.2210 -0.0070  0.1425
5545  110.466118 -7.558057 -0.0660 -0.2320  0.0070  0.1585
5546  110.466232 -7.558154 -0.0800 -0.2570  0.0150  0.1685
5547  110.466225 -7.558228 -0.0210 -0.2675  0.0190  0.1740
5548  110.466293 -7.558314  0.0320 -0.0060  0.0280  0.2140
5549  110.466301 -7.558390  0.0420 -0.0170  0.0560  0.2250
5550  110.466377 -7.558480  0.0415 -0.0230 -0.0080  0.2235
5551  110.466492 -7.558579 -0.0010  0.2600 -0.0370  0.2520
5552  110.466583 -7.558671  0.0025  0.3315 -0.0365  0.2480
5553  110.466728 -7.558775 -0.0130  0.2560 -0.0350  0.2360
5554  110.463493 -7.536173 -0.0040 -0.0430  0.0420 -0.0370
5555  110.463379 -7.536225  0.0260 -0.0420  0.0480 -0.05