In [27]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import re
import datetime

In [31]:
def kml2df(kml_filename):

    with open(kml_filename, 'r') as file:
        kml_string = file.read()
    
    rgt = int(kml_filename[kml_filename.find('RGT_')+4:kml_filename.find('RGT_')+8])
        
    kml_string_line = kml_string[kml_string.find('LineString_kml'):kml_string.find('</LineString>')]
    kml_coords_str = '[[' + kml_string_line[kml_string_line.find('<coordinates>')+len('<coordinates>'):kml_string_line.find('</coordinates>')-1] + ']]'
    kml_coords_str = kml_coords_str.replace(' ', '],[')
    kml_coords_array = np.array(eval(kml_coords_str))
    kml_lat = kml_coords_array[:,1]
    kml_lon = kml_coords_array[:,0] 
    kml_df = pd.DataFrame({'lat': kml_lat, 'lon': kml_lon, 'rgt': rgt})
    
    # find the first timestamp
    substr = kml_string[kml_string.find('<Point id='):kml_string.find('</Point>')+100]
    descr = substr[substr.find('<name>')+len('<name>'):substr.find('</name>')]
    dt_str = descr[descr.find('DOY-'):]
    dt_str = dt_str[dt_str.find(' ')+1:]
    day = int(dt_str[:dt_str.find('-')])
    month_abbr = dt_str[dt_str.find('-')+1:dt_str.rfind('-')]
    year = int(dt_str[dt_str.rfind('-')+1:dt_str.find(' ')])
    hrs = int(dt_str[dt_str.find(' ')+1:dt_str.find(':')])
    mins = int(dt_str[dt_str.find(':')+1:dt_str.rfind(':')])
    secs = int(dt_str[dt_str.rfind(':')+1:])
    datetime_str = '%4i-%3s-%02iT%02i:%02i:%02iZ' % (year, month_abbr, day, hrs, mins, secs)
    dt = datetime.datetime.strptime(datetime_str,'%Y-%b-%dT%H:%M:%SZ')
    timestamp_utc = datetime.datetime.timestamp(dt)
    kml_df['timestamp'] = timestamp_utc + np.arange(len(kml_df))
    kml_df['time_str'] = [datetime.datetime.strftime(datetime.datetime.fromtimestamp(t), '%a %Y-%b-%d %H:%M:%S') for t in kml_df.timestamp]
    
    return kml_df

In [32]:
kml_dir = '/home/jovyan/shared-public/ICESat-2-Hackweek/KMLs'
# kml_dir = '/home/jovyan/shared-public/ICESat-2-Hackweek/ground_tracks'
kml_filelist = [kml_dir+'/'+f for f in os.listdir(kml_dir) if os.path.isfile(os.path.join(kml_dir, f))]
for cycle in np.arange(1,22):
    cycle_search = '_cycle%i_' % cycle
    cycle_filelist = [f for f in kml_filelist if ((cycle_search in f) & (f[len(kml_dir)+1] != '.'))]
    cycle_filelist.sort()
    nfiles = len(cycle_filelist)
    print('cycle %2i: %4i KMLs' % (cycle, nfiles))

cycle  1:    0 KMLs
cycle  2:    0 KMLs
cycle  3:    0 KMLs
cycle  4:    0 KMLs
cycle  5:    0 KMLs
cycle  6:    0 KMLs
cycle  7:    0 KMLs
cycle  8:    0 KMLs
cycle  9:    0 KMLs
cycle 10:    0 KMLs
cycle 11:    0 KMLs
cycle 12:    0 KMLs
cycle 13:    0 KMLs
cycle 14: 1387 KMLs
cycle 15: 1387 KMLs
cycle 16:    1 KMLs
cycle 17:    0 KMLs
cycle 18:    0 KMLs
cycle 19:    0 KMLs
cycle 20: 1387 KMLs
cycle 21: 1387 KMLs


In [41]:
cycle = 21
cycle_search = '_cycle%i_' % cycle
cycle_filelist = [f for f in kml_filelist if ((cycle_search in f) & (f[len(kml_dir)+1] != '.'))]
cycle_filelist.sort()
nfiles = len(cycle_filelist)
print('cycle %2i: %4i KMLs' % (cycle, nfiles))

df_list = []
startTime = datetime.datetime.now()
for i,fn in enumerate(cycle_filelist):
    df_list.append(kml2df(fn))
    print('---> read %4i / %4i' % (i, nfiles), end='\r')
df_all = pd.concat(df_list)
print('Time elapsed:', datetime.datetime.now() - startTime)
gdf = gpd.GeoDataFrame(df_all, geometry=gpd.points_from_xy(df_all.lon, df_all.lat), crs="EPSG:4326")
gdf

cycle 21: 1387 KMLs
Time elapsed: 0:02:17.876483


Unnamed: 0,lat,lon,rgt,timestamp,time_str,geometry
0,0.027959,-0.131847,1,1.695089e+09,Tue 2023-Sep-19 02:03:28,POINT (-0.13185 0.02796)
1,0.091968,-0.138250,1,1.695089e+09,Tue 2023-Sep-19 02:03:29,POINT (-0.13825 0.09197)
2,0.155977,-0.144652,1,1.695089e+09,Tue 2023-Sep-19 02:03:30,POINT (-0.14465 0.15598)
3,0.219986,-0.151055,1,1.695089e+09,Tue 2023-Sep-19 02:03:31,POINT (-0.15105 0.21999)
4,0.283995,-0.157457,1,1.695089e+09,Tue 2023-Sep-19 02:03:32,POINT (-0.15746 0.28400)
...,...,...,...,...,...,...
5651,-0.347574,-0.094110,1387,1.702936e+09,Mon 2023-Dec-18 21:43:11,POINT (-0.09411 -0.34757)
5652,-0.283567,-0.100513,1387,1.702936e+09,Mon 2023-Dec-18 21:43:12,POINT (-0.10051 -0.28357)
5653,-0.219559,-0.106915,1387,1.702936e+09,Mon 2023-Dec-18 21:43:13,POINT (-0.10692 -0.21956)
5654,-0.155551,-0.113318,1387,1.702936e+09,Mon 2023-Dec-18 21:43:14,POINT (-0.11332 -0.15555)


In [42]:
out_dir = '/home/jovyan/shared-public/ICESat-2-Hackweek/ground_tracks/'
out_fn = 'cycle%02i_1hz_points' % cycle
pkl_path = out_dir + out_fn + '.pkl'
csv_path = out_dir + out_fn + '.csv'

startTime = datetime.datetime.now()
gdf.to_pickle(pkl_path)
print(pkl_path)
gdf.to_csv(csv_path)
print(csv_path)
print('Time elapsed:', datetime.datetime.now() - startTime)

/home/jovyan/shared-public/ICESat-2-Hackweek/ground_tracks/cycle21_1hz_points.pkl
/home/jovyan/shared-public/ICESat-2-Hackweek/ground_tracks/cycle21_1hz_points.csv
Time elapsed: 0:04:55.192931


In [6]:
gdf

Unnamed: 0,lat,lon,rgt,timestamp,time_str,geometry
0,-0.131847,0.027959,1,1.695089e+09,Tue 2023-Sep-19 02:03:28,POINT (0.02796 -0.13185)
1,-0.138250,0.091968,1,1.695089e+09,Tue 2023-Sep-19 02:03:29,POINT (0.09197 -0.13825)
2,-0.144652,0.155977,1,1.695089e+09,Tue 2023-Sep-19 02:03:30,POINT (0.15598 -0.14465)
3,-0.151055,0.219986,1,1.695089e+09,Tue 2023-Sep-19 02:03:31,POINT (0.21999 -0.15105)
4,-0.157457,0.283995,1,1.695089e+09,Tue 2023-Sep-19 02:03:32,POINT (0.28400 -0.15746)
...,...,...,...,...,...,...
5651,-0.094110,-0.347574,1387,1.702936e+09,Mon 2023-Dec-18 21:43:11,POINT (-0.34757 -0.09411)
5652,-0.100513,-0.283567,1387,1.702936e+09,Mon 2023-Dec-18 21:43:12,POINT (-0.28357 -0.10051)
5653,-0.106915,-0.219559,1387,1.702936e+09,Mon 2023-Dec-18 21:43:13,POINT (-0.21956 -0.10692)
5654,-0.113318,-0.155551,1387,1.702936e+09,Mon 2023-Dec-18 21:43:14,POINT (-0.15555 -0.11332)


In [52]:
# turn it into a geopandas dataframe of linestrings
from shapely.geometry import LineString
linestrings = gdf.groupby(['rgt'])['lat','lon'].apply(lambda x: LineString(list(zip(x.lon.tolist(),x.lat.tolist()))))
times = gdf.groupby(by='rgt')['timestamp'].mean()
dflines = gpd.GeoDataFrame(times, geometry=linestrings, crs="EPSG:4326")
dflines['time_str'] = [datetime.datetime.strftime(datetime.datetime.fromtimestamp(t), '%a %Y-%b-%d %H:%M:%S') for t in dflines.timestamp]
dflines

  linestrings = gdf.groupby(['rgt'])['lat','lon'].apply(lambda x: LineString(list(zip(x.lon.tolist(),x.lat.tolist()))))


Unnamed: 0_level_0,timestamp,geometry,time_str
rgt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.695092e+09,"LINESTRING (0.02796 -0.13185, 0.09197 -0.13825...",Tue 2023-Sep-19 02:50:35
2,1.695097e+09,"LINESTRING (0.00707 -23.74811, 0.07108 -23.754...",Tue 2023-Sep-19 04:24:53
3,1.695103e+09,"LINESTRING (0.05210 -47.37185, 0.11610 -47.378...",Tue 2023-Sep-19 05:59:10
4,1.695109e+09,"LINESTRING (0.02984 -70.98859, 0.09384 -70.994...",Tue 2023-Sep-19 07:33:28
5,1.695114e+09,"LINESTRING (0.06394 -94.61048, 0.12795 -94.616...",Tue 2023-Sep-19 09:07:45
...,...,...,...
1383,1.702910e+09,"LINESTRING (0.04089 117.96501, 0.10489 117.958...",Mon 2023-Dec-18 14:38:57
1384,1.702916e+09,"LINESTRING (0.01387 94.34678, 0.07788 94.34038...",Mon 2023-Dec-18 16:13:15
1385,1.702922e+09,"LINESTRING (0.04766 70.72270, 0.11167 70.71630...",Mon 2023-Dec-18 17:47:32
1386,1.702927e+09,"LINESTRING (0.01873 47.10595, 0.08274 47.09955...",Mon 2023-Dec-18 19:21:50
