# Preparing metadata for archiving ApRES data

In [1]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd
import glob

## Define the folder where archived data are stored

In [5]:
level_0_path =  "/Users/jkingslake/Documents/data/thwaites_apres/archiving/level_0/"

level_1_path = '/Users/jkingslake/Documents/data/thwaites_apres/archiving/level_1/'

In [6]:
def filter_folders(folder_list, filter_string):
    return [x for x in folder_list if filter_string not in x]

In [7]:
# List contents of the directory and filter for just directories
attended_22_23 = [ f.name for f in os.scandir(level_0_path + '/attended/centerline/single/2022-2023') if f.is_dir() ]
attended_22_23 = filter_folders(attended_22_23, 'netcdf')
attended_23_24 = [ f.name for f in os.scandir(level_0_path + '/attended/centerline/single/2023-2024') if f.is_dir() ]
attended_23_24 = filter_folders(attended_23_24, 'netcdf')
print(f"there are {len(attended_22_23)} waypoint 2022-2023 directories")
print(f"there are {len(attended_23_24)} waypoint 2023-2024 directories")

there are 232 waypoint 2022-2023 directories
there are 95 waypoint 2023-2024 directories


## Load metadata from the excel files

In [8]:
# Load metadata for each year and convert to strings
md_22_23 = pd.read_excel(level_0_path + '/attended/centerline/single/2022-2023/metadata_notes_22-23.xlsx')
md_23_24 = pd.read_excel(level_0_path + '/attended/centerline/single/2023-2024/metadata_notes_23-24.xlsx')
md_waypoints_22_23 = md_22_23.waypoint
md_waypoints_23_24 = md_23_24.waypoint
md_waypoints_22_23 = [str(x) for x in md_waypoints_22_23]
md_waypoints_23_24 = [str(x) for x in md_waypoints_23_24]

In [9]:
md_22_23

Unnamed: 0,waypoint,Antenna Orientation,Tx_Rx_bearing (degrees clockwise from true north),Brunton (degrees),A (dB),G (dB),Time (NZ local),Tx (cm),Rx (cm),Antenna separation (m),Date (NZ local),Date (UTC),Notes,Filename,GPS_ID,longitude (EPSG:4326 - WGS 84) 22-23,latitude (EPSG:4326 - WGS 84) 22-23,elevation (EPSG:4326 - WGS 84) 22-23
0,G1-21-05,HH,261.260000,303.0,"15, 26","-14,-4",,,,7,2022-12-31,,,,6,-107.571318,-76.002191,882.202698
1,G1-21-05,HV,261.260000,303.0,26,-4,,,,7,2022-12-31,,,,6,-107.571318,-76.002191,882.202698
2,G1-21-05,VV,261.260000,303.0,26,-4,,,,7,2022-12-31,,,,6,-107.571318,-76.002191,882.202698
3,G1-21-05,VH,261.260000,303.0,26,-4,,,,7,2022-12-31,,,,6,-107.571318,-76.002191,882.202698
4,G1-22-05,HH,258.251798,300.0,,,,,,7,2022-12-31,,,,TRAV4,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,G10-042,HV,,303.0,22,-4,,174,CNR,,2023-01-11,,,,240,-109.004602,-77.595419,1320.478394
445,G10-042,VV,,303.0,22,-4,16:43:00,174,CNR,,2023-01-11,,,,240,-109.004602,-77.595419,1320.478394
446,G10-042,VH,,303.0,22,-4,,174,CNR,,2023-01-11,,,,240,-109.004602,-77.595419,1320.478394
447,G10-10-218,HH,259.750538,305.0,22,-4,17:01:00,184,182,,2023-01-11,,,,241,-109.054292,-77.598552,1318.649536


In [10]:
md_23_24

Unnamed: 0,waypoint,priority,orientation,antennuation_dB,gain_dB,time_NZ_local,date_NZ_local,tx_flag_height_cm,rx_flag_height_cm,notes,...,latitude (EPSG:4326 - WGS 84) 23-24,elevation (EPSG:4326 - WGS 84) 23-24,scientist,waypoint_22-23,GPS_ID_22-23,longitude (EPSG:4326 - WGS 84) 22-23,latitude (EPSG:4326 - WGS 84) 22-23,elevation (EPSG:4326 - WGS 84) 22-23,Tx_22-23_cm,Rx_22-23_cm
0,G1-21-05,2,HH,26,-4,12:50:00,12/26/23,126.0,124.0,Ran 3 bursts at this site: First with given at...,...,-75.99746,,Sierra,G1-21-05,6,-107.571318,-76.002191,882.202698,,
1,G1-22-05,2,HH,30,-4,12:41:00,12/26/23,176.5,107.0,,...,-76.00181,,Sierra,G1-22-05,TRAV4,-107.571960,-76.006190,,,
2,G1-23-05,2,HH,26,-4,12:32:00,12/26/23,120.5,183.0,,...,-76.00628,,Sierra,G1-23-05,7,-107.576420,-76.011062,898.020508,,
3,G1-24-05,2,HH,26,-4,12:23:00,12/26/23,151.0,162.0,,...,-76.01073,,Sierra,G1-24-05,8,-107.579000,-76.015486,892.305969,,
4,G1-25-05,2,HH,26,-4,12:15:00,12/26/23,85.0,91.0,"Rx flag tilted, 80 cm vertical (for ALL tilted...",...,-76.01520,,Sierra,G1-25-05,18,-107.581394,-76.019930,892.733093,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,G4-36-05,1,HH,22,-4,,,44.0,15.0,,...,-76.60682,1122.0,Alex,G4-36-05,124,-107.930936,-76.610121,1127.703857,178.0,152.0
91,G4-40-05,1,HH,22,-4,,,28.0,29.0,,...,,,Alex,G4-40-05,126,-107.941881,-76.627848,1134.697876,177.0,178.0
92,G5-03-05,1,HH,22,-4,,,33.0,42.0,,...,-76.64241,1134.0,Alex,G5-03-05,128,-107.952892,-76.645608,1128.172607,175.0,181.0
93,G5-11-05,1,HH,22,-4,,,32.0,62.0,,...,-76.67796,1133.0,Alex,G5-11-05,133,-107.974953,-76.681060,1140.732544,187.0,216.0


## Copy the metadata excel files from level_0 to level_1 folders

In [15]:
#Copy the metadata excel files from level_0 single folders to polarimetric folders in level 0 and both types of folders in level 1
import shutil
shutil.copy(level_0_path + '/attended/centerline/single/2022-2023/metadata_notes_22-23.xlsx', level_0_path + '/attended/centerline/polarimetric/2022-2023/metadata_notes_22-23.xlsx')
shutil.copy(level_0_path + '/attended/centerline/single/2023-2024/metadata_notes_23-24.xlsx', level_0_path + '/attended/centerline/polarimetric/2023-2024/metadata_notes_23-24.xlsx')

'/Users/jkingslake/Documents/data/thwaites_apres/archiving/level_0//attended/centerline/polarimetric/2023-2024/metadata_notes_23-24.xlsx'

## Copy unattended position csv files from level 0 to level 1 folders
Decided not to do this becaus the positions are already in the netcdfs.

In [14]:
## Copy unattended position csv files from level 0 to level 1 folders
#shutil.copy(level_0_path + '/unattended/Lake1/Lake1_positions.csv', level_1_path + '/Lake1_positions.csv')
#shutil.copy(level_0_path + '/unattended/Lake2/Lake2_positions.csv', level_1_path + '/Lake2_positions.csv')
#shutil.copy(level_0_path + '/unattended/LTG/LTG_positions.csv', level_1_path + '/LTG_positions.csv')

'/Users/jkingslake/Documents/data/thwaites_apres/archiving/level_1//LTG_positions.csv'

## Confirm that the waypoints in the metadata are the same as the directories containing the data
For the centerline attended surveys, the .dat files are each supposed to be in a directory corresponding to their waypoint name. Below we check that this these directory names are all included in the metadata. 

In [16]:
# Check that all the folder names are contained in the metadata. 
print(all([x in md_waypoints_22_23 for x in attended_22_23]))
print(all([x in md_waypoints_23_24 for x in attended_23_24]))


True
True


## Check that the centerline polarimetric folder names are in the metadata

In [10]:
attended_22_23_polar = [ f.name for f in os.scandir(level_0_path + '/attended/centerline/polarimetric/2022-2023') if f.is_dir() ]
attended_22_23_polar = filter_folders(attended_22_23_polar, 'netcdf')
attended_23_24_polar = [ f.name for f in os.scandir(level_0_path + '/attended/centerline/polarimetric/2023-2024') if f.is_dir() ]
attended_23_24_polar = filter_folders(attended_23_24_polar, 'netcdf')
print(f"there are {len(attended_22_23_polar)} waypoint 2022-2023 directories")
print(f"there are {len(attended_23_24_polar)} waypoint 2023-2024 directories")

there are 43 waypoint 2022-2023 directories
there are 4 waypoint 2023-2024 directories


In [11]:
print(all([x in md_waypoints_22_23 for x in attended_22_23_polar]))
print(all([x in md_waypoints_23_24 for x in attended_23_24_polar]))

True
True


## Note on naming conventions and changes made to metadata and file names. 
As described in a file supplied by Elizabeth Case in the original collection of files sent to Jonny Kingslake (/Users/jkingslake/Documents/data/thwaites_apres/original/2022-2023/Polarmetric/NamingConvention.md), the naming convection of some sites have waypoint number at the end of their name, typically 250. Within the waypoint name which include this number, there were difference in how they are referred to between the metadata, the folder names for the single measurements, and the folder names for the polarimetric measurements. 

- The metadata originally just had either the box number and the waypoint number,  e.g., G8-249 or just the  the waypoint number, e.g, 249. 
- The folder names for the single measurements originally had the box number, the waypoint number, and an additional number (described in NamingConvention.md), e.g., G9-01-248
- The folder names for the polarimetirc data had the box number and the the waypoint number,  e.g., G8-249.

In the archived versions, I have changed the waypoint names in the metadata and in the folder names of the polarimetric data to be the same as the folder names from the single data, e.g., G9-01-248 wherever possible. In cases where a polarimetric waypoint is not also in the single-measurement waypoints, I have changed the metadata to be the same as the polarimetric folder names. 

## Create a hybrid position record
There are three sources of waypoint positions:
1. The 22-23 metadata (source 1)
2. The 23-24 metadata, but it is noted that this is from the previous season (source 2)
3. The 23-24 metadata, recorded by the operators in 23-24 (source 3)
Note that in some cases waypoints are listed more than once with the same or different positions. 

The cell below combines these using the following logic:
For each waypoint we look first and source 1, then source 2, then finally source 3/ 
1. If any of the locations listed for a waypoint in source 1 has longitude, latitude and elevation present, we use them. If no we move onto step 2.
2. If source 2 has an entry for this waypoint we move on to step 3 (if not we give up and put in NaNs for the position).
3. If any of the locations listed for a waypoint in source 2 has longitude, latitude and elevation present, we use them. If not we move onto step 4.
4. If source 3 has an entry for this waypoint (which it will because isin the same dataframe as source 2), we move onto step 5
5. Is any of the locations listed in source 3 has a latitude present, we use the longitude, latitude and elevations from that entry. 

In [12]:
md_22_23.set_index('waypoint', inplace=True)
md_23_24.set_index('waypoint', inplace=True)

In [13]:
md_22_23.insert(md_22_23.shape[1], 'latitude (EPSG:4326 - WGS 84) combined', np.nan)
md_22_23.insert(md_22_23.shape[1], 'longitude (EPSG:4326 - WGS 84) combined', np.nan)
md_22_23.insert(md_22_23.shape[1], 'elevation (EPSG:4326 - WGS 84) combined', np.nan)
md_22_23.insert(md_22_23.shape[1], 'combined location source', np.nan)#

In [14]:
def remove_nan_from_list(input_list):
    """Remove NaN values from a list."""
    return [c for c in input_list.tolist() if  ~np.isnan(c)]

def remove_nans_from_positions(lat, lon, elev):
    if not isinstance(lat, float):
        lats_noNaNs = remove_nan_from_list(lat)
        lons_noNaNs = remove_nan_from_list(lon)
        elevs_noNaNs = remove_nan_from_list(elev)
    else:
        lats_noNaNs = [lat]
        lons_noNaNs = [lon]
        elevs_noNaNs = [elev]
    return lats_noNaNs, lons_noNaNs, elevs_noNaNs



# combine the latitude data into one list, using values from 2022-2023 where available, but replacing any nans with values from 2023-2024 metadata when needed
skip = False
for waypoint in md_waypoints_22_23:
    #print(type(waypoint))
    try:
        waypoint = int(waypoint)
    except ValueError: # continue with the rest of the iteration
        pass

    lat_22_23 = md_22_23.loc[waypoint, 'latitude (EPSG:4326 - WGS 84) 22-23']
    lon_22_23 = md_22_23.loc[waypoint, 'longitude (EPSG:4326 - WGS 84) 22-23']
    elev_22_23 = md_22_23.loc[waypoint, 'elevation (EPSG:4326 - WGS 84) 22-23']
    #print(lat_22_23)
    #len(lat_22_23)
    if  (~np.isnan(lat_22_23)).any() and (~np.isnan(lon_22_23)).any() and (~np.isnan(elev_22_23)).any():

        lats_noNaNs, lons_noNaNs, elevs_noNaNs = remove_nans_from_positions(lat_22_23, lon_22_23, elev_22_23)
        
        md_22_23.loc[waypoint, 'latitude (EPSG:4326 - WGS 84) combined'] = lats_noNaNs[0]
        md_22_23.loc[waypoint, 'longitude (EPSG:4326 - WGS 84) combined'] = lons_noNaNs[0]
        md_22_23.loc[waypoint, 'elevation (EPSG:4326 - WGS 84) combined'] = elevs_noNaNs[0]
        md_22_23.loc[waypoint, 'combined location source'] = '22-23 metadata'
        #print(f"using 22-23 metadata for {waypoint}")
    else:
        print(f"position data for {waypoint} in 2022-2023 metadata is nans")

        try:
            lat_23_24_from22_23 = md_23_24.loc[waypoint, 'latitude (EPSG:4326 - WGS 84) 22-23']
            lon_23_24_from22_23 = md_23_24.loc[waypoint, 'longitude (EPSG:4326 - WGS 84) 22-23']
            elev_23_24_from22_23 = md_23_24.loc[waypoint, 'elevation (EPSG:4326 - WGS 84) 22-23']
        except KeyError:
            skip_23_24 = True
            print(f"no entry for {waypoint} in 2023-2024 metadata")
            continue

        if   (~np.isnan(lat_23_24_from22_23)).any() and (~np.isnan(lon_23_24_from22_23)).any() and (~np.isnan(elev_23_24_from22_23)).any():
                
            lats_noNaNs, lons_noNaNs, elevs_noNaNs = remove_nans_from_positions(lat_23_24_from22_23, 
                                                                                lon_23_24_from22_23, 
                                                                                elev_23_24_from22_23)

            md_22_23.loc[waypoint, 'latitude (EPSG:4326 - WGS 84) combined'] = lats_noNaNs[0]
            md_22_23.loc[waypoint, 'longitude (EPSG:4326 - WGS 84) combined'] = lons_noNaNs[0]
            md_22_23.loc[waypoint, 'elevation (EPSG:4326 - WGS 84) combined'] = elevs_noNaNs[0]
            md_22_23.loc[waypoint, 'combined location source'] = '23-24 metadata, recorded in 22-23'
            print(f"using 23-24 metadata for {waypoint} recorded in 22-23 season")
        else:
            print(f"no position data for {waypoint} in 2023-2024 (recorded in 22-23), trying to find it in 2023-2024 (recorded in 23-24)")


            lat_23_24 = md_23_24.loc[waypoint, 'latitude (EPSG:4326 - WGS 84) 23-24']
            lon_23_24 = md_23_24.loc[waypoint, 'longitude (EPSG:4326 - WGS 84) 23-24']
            elev_23_24 = md_23_24.loc[waypoint, 'elevation (EPSG:4326 - WGS 84) 23-24']

            if  (~np.isnan(lat_23_24)).any():
                lats_noNaNs, lons_noNaNs, elevs_noNaNs = remove_nans_from_positions(lat_23_24,
                                                                                    lon_23_24,
                                                                                    elev_23_24)
                
                md_22_23.loc[waypoint, 'latitude (EPSG:4326 - WGS 84) combined'] = lats_noNaNs[0]
                md_22_23.loc[waypoint, 'longitude (EPSG:4326 - WGS 84) combined'] = lons_noNaNs[0]
                md_22_23.loc[waypoint, 'elevation (EPSG:4326 - WGS 84) combined'] = elevs_noNaNs[0]
                md_22_23.loc[waypoint, 'combined location source'] = '23-24 metadata recorded in 23-24 season'
                print(f"using 23-24 metadata for {waypoint} recorded in 23-24 season")
            
            else:
                print(f"no lat data found for {waypoint} ")



out = md_22_23.loc[:, ['latitude (EPSG:4326 - WGS 84) combined',
                      'longitude (EPSG:4326 - WGS 84) combined',
                      'elevation (EPSG:4326 - WGS 84) combined',
                      'Tx_Rx_bearing (degrees clockwise from true north)',
                      'combined location source']]
out.index.name = 'waypoint'
out

position data for G1-22-05 in 2022-2023 metadata is nans
no position data for G1-22-05 in 2023-2024 (recorded in 22-23), trying to find it in 2023-2024 (recorded in 23-24)
using 23-24 metadata for G1-22-05 recorded in 23-24 season
position data for G1-30-05 in 2022-2023 metadata is nans
no position data for G1-30-05 in 2023-2024 (recorded in 22-23), trying to find it in 2023-2024 (recorded in 23-24)
using 23-24 metadata for G1-30-05 recorded in 23-24 season
position data for G1-28-05 in 2022-2023 metadata is nans
no position data for G1-28-05 in 2023-2024 (recorded in 22-23), trying to find it in 2023-2024 (recorded in 23-24)
using 23-24 metadata for G1-28-05 recorded in 23-24 season
position data for G3-23-05 in 2022-2023 metadata is nans
no position data for G3-23-05 in 2023-2024 (recorded in 22-23), trying to find it in 2023-2024 (recorded in 23-24)
using 23-24 metadata for G3-23-05 recorded in 23-24 season
position data for G7-04-05 in 2022-2023 metadata is nans
no entry for G7-04-

  md_22_23.loc[waypoint, 'combined location source'] = '22-23 metadata'


Unnamed: 0_level_0,latitude (EPSG:4326 - WGS 84) combined,longitude (EPSG:4326 - WGS 84) combined,elevation (EPSG:4326 - WGS 84) combined,Tx_Rx_bearing (degrees clockwise from true north),combined location source
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G1-21-05,-76.002191,-107.571318,882.202698,261.260000,22-23 metadata
G1-21-05,-76.002191,-107.571318,882.202698,261.260000,22-23 metadata
G1-21-05,-76.002191,-107.571318,882.202698,261.260000,22-23 metadata
G1-21-05,-76.002191,-107.571318,882.202698,261.260000,22-23 metadata
G1-22-05,-76.001810,-107.573510,,258.251798,23-24 metadata recorded in 23-24 season
...,...,...,...,...,...
G10-042,-77.595419,-109.004602,1320.478394,,22-23 metadata
G10-042,-77.595419,-109.004602,1320.478394,,22-23 metadata
G10-042,-77.595419,-109.004602,1320.478394,,22-23 metadata
G10-10-218,-77.598552,-109.054292,1318.649536,259.750538,22-23 metadata


## Create season-specific positions csvs

In [15]:
out_22_23 = md_22_23.loc[:, ['longitude (EPSG:4326 - WGS 84) 22-23',
                'latitude (EPSG:4326 - WGS 84) 22-23',
                'elevation (EPSG:4326 - WGS 84) 22-23',
                'Tx_Rx_bearing (degrees clockwise from true north)']]

# correct the magnetic bearing
#declination = 0  # degrees
#out_22_23['Brunton (degrees)'] = (out_22_23['Brunton (degrees)'] - declination)

out_23_24 = md_23_24.loc[:, ['longitude (EPSG:4326 - WGS 84) 23-24',
                'latitude (EPSG:4326 - WGS 84) 23-24',
                'elevation (EPSG:4326 - WGS 84) 23-24',]]
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#    display(out_23_24)
# rename columns to long, lat, elev
out_22_23.columns = ['lon (EPSG:4326 - WGS 84)',
                     'lat (EPSG:4326 - WGS 84)',
                     'elevation (m; EPSG:4326 - WGS 84)',
                     'Tx_Rx_bearing (degrees clockwise from true north)']
out_23_24.columns = ['lon (EPSG:4326 - WGS 84)',
                     'lat (EPSG:4326 - WGS 84)',
                     'elevation (m; EPSG:4326 - WGS 84)']

# remove duplicates
out_22_23 = out_22_23.loc[~out_22_23.index.duplicated(keep="first"),:]
out_23_24 = out_23_24.loc[~out_23_24.index.duplicated(keep="first"),:]

# remove 'G3-12-05-2', 'G3-20-05-2' from 23-24 because they were visited twice and that is too complex to include int eh netcdf .
out_23_24 = out_23_24.drop(index=['G3-12-05-2', 'G3-20-05-2'])

# add a unique number for each waypoint (based on the first year because that has more waypoints).
out_22_23.insert(0, 'waypoint_number', range(1, 1 + len(out_22_23)))
out_23_24.insert(0, 'waypoint_number', out_22_23.loc[out_23_24.index,'waypoint_number'])

# sort by the waypoint number
out_22_23 = out_22_23.sort_values('waypoint_number')
out_23_24 = out_23_24.sort_values('waypoint_number')

# add bearing info to 23-24, based on 22-23
out_23_24.insert(4, 'Tx_Rx_bearing (degrees clockwise from true north)', out_22_23.loc[out_23_24.index,'Tx_Rx_bearing (degrees clockwise from true north)'])


out_23_24#.loc['G8-07-262',:]  

Unnamed: 0_level_0,waypoint_number,lon (EPSG:4326 - WGS 84),lat (EPSG:4326 - WGS 84),elevation (m; EPSG:4326 - WGS 84),Tx_Rx_bearing (degrees clockwise from true north)
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G1-21-05,1,-107.57095,-75.99746,,261.260000
G1-22-05,2,-107.57351,-76.00181,,258.251798
G1-23-05,3,-107.57617,-76.00628,,263.243596
G1-24-05,4,-107.57902,-76.01073,,263.235394
G1-29-05,5,-107.59096,-76.03301,,263.194385
...,...,...,...,...,...
G4-36-05,123,-107.92979,-76.60682,1122.0,262.050846
G4-40-05,125,,,,
G5-03-05,127,-107.95187,-76.64241,1134.0,261.968692
G5-11-05,131,-107.97378,-76.67796,1133.0,260.886538


In [16]:
out_22_23

Unnamed: 0_level_0,waypoint_number,lon (EPSG:4326 - WGS 84),lat (EPSG:4326 - WGS 84),elevation (m; EPSG:4326 - WGS 84),Tx_Rx_bearing (degrees clockwise from true north)
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G1-21-05,1,-107.571318,-76.002191,882.202698,261.260000
G1-22-05,2,,,,258.251798
G1-23-05,3,-107.576420,-76.011062,898.020508,263.243596
G1-24-05,4,-107.579000,-76.015486,892.305969,263.235394
G1-29-05,5,-107.591433,-76.037646,894.773804,263.194385
...,...,...,...,...,...
G10-08-220,242,-109.037602,-77.580705,1346.282715,259.791615
G10-09-219,243,-109.045977,-77.589632,1340.057617,259.771077
G10-042,244,-109.004602,-77.595419,1320.478394,
G10-10-218,245,-109.054292,-77.598552,1318.649536,259.750538


## Write season-specific positions csvs

In [17]:
# convert to geodataframe, add crs
geom =gpd.points_from_xy(x=out_22_23['lon (EPSG:4326 - WGS 84)'], y=out_22_23['lat (EPSG:4326 - WGS 84)'])
out_22_23 = gpd.GeoDataFrame(out_22_23, geometry = geom)
out_22_23 = out_22_23.set_crs('EPSG:4326')

# save
out_22_23.to_csv(level_0_path + '/attended/centerline/single/2022-2023/positions.csv')
out_22_23.to_csv(level_0_path + '/attended/centerline/polarimetric/2022-2023/positions.csv')
out_22_23.to_csv(level_1_path + '/attended/centerline/single/2022-2023/positions.csv')
out_22_23.to_csv(level_1_path + '/attended/centerline/polarimetric/2022-2023/positions.csv')


In [18]:
# convert to geodataframe, add crs
geom =gpd.points_from_xy(x=out_23_24['lon (EPSG:4326 - WGS 84)'], y=out_23_24['lat (EPSG:4326 - WGS 84)'])
out_23_24 = gpd.GeoDataFrame(out_23_24, geometry = geom)
out_23_24 = out_23_24.set_crs('EPSG:4326')

# save
out_23_24.to_csv(level_0_path + '/attended/centerline/single/2023-2024/positions.csv')
out_23_24.to_csv(level_0_path + '/attended/centerline/polarimetric/2023-2024/positions.csv')
out_23_24.to_csv(level_1_path + '/attended/centerline/single/2023-2024/positions.csv')
out_23_24.to_csv(level_1_path + '/attended/centerline/polarimetric/2023-2024/positions.csv')

## Write hybrid position file
(created in a previous section above)

In [19]:
hybrid = md_22_23.loc[:, ['latitude (EPSG:4326 - WGS 84) combined',
                      'longitude (EPSG:4326 - WGS 84) combined',
                      'elevation (EPSG:4326 - WGS 84) combined',
                      'Tx_Rx_bearing (degrees clockwise from true north)',
                      'combined location source']]
hybrid.index.name = 'waypoint'
geom =gpd.points_from_xy(x=hybrid['longitude (EPSG:4326 - WGS 84) combined'], y=hybrid['latitude (EPSG:4326 - WGS 84) combined'])
hybrid = gpd.GeoDataFrame(hybrid, geometry = geom)
hybrid = hybrid.set_crs('EPSG:4326')  
hybrid = hybrid.loc[~out.index.duplicated(keep="first"),:]


# rename columns to long, lat, elev
hybrid.rename(columns={'latitude (EPSG:4326 - WGS 84) combined': 'lat (EPSG:4326 - WGS 84)',
                             'longitude (EPSG:4326 - WGS 84) combined': 'lon (EPSG:4326 - WGS 84)',
                             'elevation (EPSG:4326 - WGS 84) combined': 'elevation (m; EPSG:4326 - WGS 84)',
                             'combined location source': 'source'}, inplace=True)

# add a unique number for each waypoint (based on the first year because that has more waypoints).
hybrid.insert(0, 'waypoint_number', out_22_23.loc[hybrid.index,'waypoint_number'])

#hybrid.to_file(archived_data_path + '/attended/centerline/positions_hybrid.shp', driver='ESRI Shapefile')
hybrid.to_csv(level_0_path + '/attended/centerline/positions_hybrid.csv')
hybrid.to_csv(level_1_path + '/attended/centerline/positions_hybrid.csv')
hybrid

Unnamed: 0_level_0,waypoint_number,lat (EPSG:4326 - WGS 84),lon (EPSG:4326 - WGS 84),elevation (m; EPSG:4326 - WGS 84),Tx_Rx_bearing (degrees clockwise from true north),source,geometry
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
G1-21-05,1,-76.002191,-107.571318,882.202698,261.260000,22-23 metadata,POINT (-107.57132 -76.00219)
G1-22-05,2,-76.001810,-107.573510,,258.251798,23-24 metadata recorded in 23-24 season,POINT (-107.57351 -76.00181)
G1-23-05,3,-76.011062,-107.576420,898.020508,263.243596,22-23 metadata,POINT (-107.57642 -76.01106)
G1-24-05,4,-76.015486,-107.579000,892.305969,263.235394,22-23 metadata,POINT (-107.579 -76.01549)
G1-29-05,5,-76.037646,-107.591433,894.773804,263.194385,22-23 metadata,POINT (-107.59143 -76.03765)
...,...,...,...,...,...,...,...
G10-08-220,242,-77.580705,-109.037602,1346.282715,259.791615,22-23 metadata,POINT (-109.0376 -77.5807)
G10-09-219,243,-77.589632,-109.045977,1340.057617,259.771077,22-23 metadata,POINT (-109.04598 -77.58963)
G10-042,244,-77.595419,-109.004602,1320.478394,,22-23 metadata,POINT (-109.0046 -77.59542)
G10-10-218,245,-77.598552,-109.054292,1318.649536,259.750538,22-23 metadata,POINT (-109.05429 -77.59855)


## Combine position data from acrossline polarimetric csvs
The polarimetric data from the across line are stored in four directories in `/Users/jkingslake/Documents/data/thwaites_apres/archiving/attended/acrossline/polarimetric/GHOST24_Polarimetric_pRES_OZ`. In each one there is a csv containing the position of the measurement. 

Below we collate these into one csv.

In [20]:
# use glob to find csvs in that dir
acrossline_polarimetric_csvs = glob.glob(level_0_path + '/attended/acrossline/polarimetric//PpRES_*/*.csv')


In [21]:
polarimetric_acrossline_positions = pd.concat((pd.read_csv(f) for f in acrossline_polarimetric_csvs))

# rename latitude and longituse to lat and lon
polarimetric_acrossline_positions.rename(columns={'Latitude': 'lat (EPSG:4326 - WGS 84)',
                                            'Longitude': 'lon (EPSG:4326 - WGS 84)'}, inplace=True)

# insert a column of nans called elevation
polarimetric_acrossline_positions.insert(3, 'elevation (m; EPSG:4326 - WGS 84)', np.nan)

# rename Stations -- > waypoints and set as index
polarimetric_acrossline_positions = polarimetric_acrossline_positions.rename(columns={'Station': 'waypoint'})
polarimetric_acrossline_positions.set_index('waypoint', inplace=True)

# add waypoint number
polarimetric_acrossline_positions.insert(0, 'waypoint_number', [x[-1] for x in polarimetric_acrossline_positions.index])

# swap the positions of lat and lon
cols = polarimetric_acrossline_positions.columns.tolist()
cols = [cols[0], cols[2], cols[1]] + cols[3:]   
polarimetric_acrossline_positions = polarimetric_acrossline_positions[cols]

# add bearing info (from pdf Ole supplied with the data)
polarimetric_acrossline_positions.insert(4, 'Tx_Rx_bearing (degrees clockwise from true north)', 0.0)

polarimetric_acrossline_positions

Unnamed: 0_level_0,waypoint_number,lon (EPSG:4326 - WGS 84),lat (EPSG:4326 - WGS 84),elevation (m; EPSG:4326 - WGS 84),Tx_Rx_bearing (degrees clockwise from true north)
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PpRES_20240551_001,1,-107.389709,-76.457764,,0.0
PpRES_20240551_003,3,-104.822354,-76.472572,,0.0
PpRES_20240551_004,4,-106.977237,-76.487418,,0.0
PpRES_20240551_002,2,-106.578286,-76.467489,,0.0


In [22]:
# convert to geodataframe, add crs
geom =gpd.points_from_xy(x=polarimetric_acrossline_positions['lon (EPSG:4326 - WGS 84)'], y=polarimetric_acrossline_positions['lat (EPSG:4326 - WGS 84)'])
polarimetric_acrossline_positions = gpd.GeoDataFrame(polarimetric_acrossline_positions, geometry = geom)

polarimetric_acrossline_positions

Unnamed: 0_level_0,waypoint_number,lon (EPSG:4326 - WGS 84),lat (EPSG:4326 - WGS 84),elevation (m; EPSG:4326 - WGS 84),Tx_Rx_bearing (degrees clockwise from true north),geometry
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PpRES_20240551_001,1,-107.389709,-76.457764,,0.0,POINT (-107.38971 -76.45776)
PpRES_20240551_003,3,-104.822354,-76.472572,,0.0,POINT (-104.82235 -76.47257)
PpRES_20240551_004,4,-106.977237,-76.487418,,0.0,POINT (-106.97724 -76.48742)
PpRES_20240551_002,2,-106.578286,-76.467489,,0.0,POINT (-106.57829 -76.46749)


In [23]:
polarimetric_acrossline_positions.to_csv(level_0_path + '/attended/acrossline/polarimetric/positions.csv')
polarimetric_acrossline_positions.to_csv(level_1_path + '/attended/acrossline/polarimetric/positions.csv')


## Prepare bearings csv for adding to the 22-23 metadata excel spreadsheet

In [24]:
b = pd.read_excel('../../../../../data/thwaites_apres/original/bearings.xlsx')
b.set_index('waypoint', inplace=True)
b

Unnamed: 0_level_0,priority,orientation,magnetic_bearing_E,true_bearing,antennuation_dB,gain_dB,time_NZ_local,date_NZ_local,tx_flag_height_cm,rx_flag_height_cm,...,ycoord_23-24,zcoord_23-24,scientist,waypoint_22-23,GPS_ID_22-23,xcoord_22-23,ycoord_22-23,zcoord_22-23,Tx_22-23_cm,Rx_22-23_cm
waypoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
G1-21-05,2,HH,314,261.260000,26,-4,,,,,...,,,,G1-21-05,6,-107.571318,-76.002191,882.202698,,
G1-22-05,2,HH,311,258.251798,30,-4,,,,,...,,,,G1-22-05,TRAV4,-107.571960,-76.006190,,,
G1-23-05,2,HH,316,263.243596,26,-4,,,,,...,,,,G1-23-05,7,-107.576420,-76.011062,898.020508,,
G1-24-05,2,HH,316,263.235394,26,-4,,,,,...,,,,G1-24-05,8,-107.579000,-76.015486,892.305969,,
G1-25-05,2,HH,315,262.227192,26,-4,,,,,...,,,,G1-25-05,18,-107.581394,-76.019930,892.733093,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
G10-07-221,4,HH,315,258.812154,22,-4,,,,,...,,,,221,237,-109.029264,-77.571790,1357.663574,183,121.0
G10-08-220,4,HH,316,259.791615,22,-4,,,,,...,,,,220,238,-109.037602,-77.580705,1346.282715,116,143.0
G10-09-219,4,HH,316,259.771077,22,-4,,,,,...,,,,219,239,-109.045977,-77.589632,1340.057617,,
G10-10-218,4,HH,316,259.750538,22,-4,,,,,...,,,,218,241,-109.054292,-77.598552,1318.649536,184,182.0


In [25]:
bearings_for_md22_23 = md_22_23.copy()

for index, row in bearings_for_md22_23.iterrows():
    try:
        bearings_for_md22_23.loc[index, 'true_bearing'] = b.loc[index, 'true_bearing']
    except KeyError:
        bearings_for_md22_23.loc[index, 'true_bearing'] = np.nan
        pass
bearings_for_md22_23.true_bearing.to_csv('../../../../../data/thwaites_apres/original/bearings_for_md22_23.csv')