**Reading in the data file**
1. On the left, create a folder called files.
2. Upload any .gpx files you want to correct depths of into this folder

In [None]:
!pip install gpxpy
import os
import gpxpy
import re
import pandas as pd

def extract_data(gpx_file_path):
    data = {
        'Latitude': [],
        'Longitude': [],
        'Depth': [],
        'Time': []
    }

    with open(gpx_file_path, 'r') as gpx_file:
        gpx_content = gpx_file.read()

        lat_pattern = r'lat="(-?\d+\.\d+)"'
        data['Latitude'] = re.findall(lat_pattern, gpx_content)

        lon_pattern = r'lon="(-?\d+\.\d+)"'
        data['Longitude'] = re.findall(lon_pattern, gpx_content)

        depth_pattern = r'depth>(\d+\.\d+)'
        data['Depth'] = re.findall(depth_pattern, gpx_content)

        time_pattern = r'time>(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})'
        extracted_times = re.findall(time_pattern, gpx_content)

        first_time = extracted_times[0]
        for time in extracted_times:
            if time:
                if first_time is None:
                    first_time = time
                data['Time'].append(time)
            else:
                data['Time'].append(first_time)

    max_length = max(len(data['Latitude']), len(data['Longitude']), len(data['Depth']), len(data['Time']))
    for key in data.keys():
        data[key] += [None] * (max_length - len(data[key]))

    return data
# If the folder is named something other than files, change the name here
folder_path = './files/'

gpx_files = [f for f in os.listdir(folder_path) if f.endswith('.gpx')]

combined_df = pd.DataFrame()

for gpx_file in gpx_files:
    gpx_file_path = os.path.join(folder_path, gpx_file)
    extracted_data = extract_data(gpx_file_path)

    df = pd.DataFrame(extracted_data)
    columns_to_convert = ['Latitude', 'Longitude', 'Depth']
    df[columns_to_convert] = df[columns_to_convert].astype(float)
    df['Time'] = df['Time'].fillna(method='ffill')
    df = df[df['Depth'] != 0.0]
    df.loc[df['Depth'] != 0, 'Depth'] -= 1.5

    combined_df = combined_df.append(df, ignore_index=True)
# uncomment the line below to show the data frame
# print(combined_df)




  combined_df = combined_df.append(df, ignore_index=True)


**Shapefile**
1. On the left, upload shapefile. Every subfile should be named shape and then the correct file type.
2. If the shapefiles have a different name, correct the name below where indicated

In [None]:

import zipfile
import geopandas as gpd
# change the name of the shape file here if needed
shapefile_path = 'shape.shp'

gdf = gpd.read_file(shapefile_path)


**Finding Zones**
1. Run the code below twice to find the zones associated with each point
2. Uncomment last line of code below if you want to see the zones

In [None]:
import geopandas as gpd
from shapely.geometry import Point
geometry = [Point(lon, lat) for lon, lat in zip(df['Longitude'], df['Latitude'])]

gdf_points = gpd.GeoDataFrame(geometry=geometry, crs=gdf.crs)
selected_row = gdf.iloc[0]


from shapely.geometry import Polygon
from shapely import wkt
gdf_wgs84 = gdf.to_crs("EPSG:4326")


gdf['zone_index'] = gdf.index + 1

joined = gpd.sjoin(gdf_points, gdf_wgs84, how='left', op='within')

# uncomment the line below to see the zones
print(joined[['geometry',  'zone_index']])


                     geometry  zone_index
0      POINT (-75.977 37.455)      1147.0
1      POINT (-75.974 37.456)      1147.0
2      POINT (-75.944 37.559)      1164.0
3      POINT (-75.937 37.554)      1164.0
4      POINT (-75.921 37.551)      1154.0
...                       ...         ...
20418  POINT (-75.806 37.511)         NaN
20419  POINT (-75.806 37.510)         NaN
20420  POINT (-75.806 37.510)         NaN
20421  POINT (-75.807 37.510)         NaN
20422  POINT (-75.806 37.509)         NaN

[20423 rows x 2 columns]


  if (await self.run_code(code, result,  async_=asy)):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3857
Right CRS: EPSG:4326

  joined = gpd.sjoin(gdf_points, gdf_wgs84, how='left', op='within')


**Adding Station Information**
1. On the left, add the CSV with info on the zone.
2. If the name is not NorthAtlantic_5.csv, rename it below




In [None]:

import pandas as pd

# rename the file below
file_path = "NorthAtlantic_5.csv"

NorthAtlantic = pd.read_csv(file_path)
merged_df = pd.merge(joined, NorthAtlantic, left_on='zone_index', right_on='OBJECTID', how='inner')

merged_df['Latitude'] = merged_df['geometry'].y
merged_df['Longitude'] = merged_df['geometry'].x

merged_df = merged_df.drop(columns=['geometry'])

final_df = pd.merge(df, merged_df, on=['Latitude', 'Longitude'], how='inner')

final_df['Time'] = pd.to_datetime(final_df['Time'])
final_df['ATCorr'] = pd.to_numeric(final_df['ATCorr'], errors='coerce').fillna(0)

# Add the timedelta from 'ATCorr' to 'time'
final_df['new_time'] = final_df['Time'] - pd.to_timedelta(final_df['ATCorr'], unit='m')

# If you want to keep only the new_time column, you can drop the original 'time' and 'ATCorr' columns
final_df = final_df.drop(['Time', 'ATCorr'], axis=1)
# uncomment the line below to display the merged DataFrame
print(final_df)


        Latitude  Longitude  Depth  index_right ControlStn_x    RR ControlS_1  \
0      37.455373 -75.977406  357.5       1146.0      8636580  1.59       None   
1      37.456211 -75.974469  230.5       1146.0      8636580  1.59       None   
2      37.558602 -75.943757   23.5       1163.0      8636580  1.42       None   
3      37.554225 -75.937235  211.5       1163.0      8636580  1.42       None   
4      37.551207 -75.920913  193.5       1153.0      8636580  1.51       None   
...          ...        ...    ...          ...          ...   ...        ...   
15339  37.504261 -75.791875   33.5        132.0      8631044  1.00       None   
15340  37.505585 -75.793879   33.5        132.0      8631044  1.00       None   
15341  37.506215 -75.796043   33.5        132.0      8631044  1.00       None   
15342  37.507066 -75.798415   31.5        132.0      8631044  1.00       None   
15343  37.507460 -75.800984   33.5        132.0      8631044  1.00       None   

       ATCorr2  RR2 DataPro

**Making API Calls**
Uncomment the lines below to print if it used cached data or re-pinged the website

In [None]:
import requests
import pandas as pd
from json.decoder import JSONDecodeError
from collections import namedtuple

api_url = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?"
params = {'datum': 'MSL', 'units': 'english', 'product': 'water_level', 'time_zone': 'lst_ldt',
          'interval': 'h', 'bin': '0', 'format': 'json', 'range': '1'}
ApiCache = namedtuple('ApiCache', ['Time', 'ControlStn_y', 'Data'])
api_cache = {}

def make_api_call(row):
    datetime_str = row['new_time']
    date_time = pd.to_datetime(datetime_str, format='%Y-%m-%dT%H:%M:%S')
    station = row['ControlStn_y']

    if 'Time' in api_cache:

        cache_data = ApiCache(Time=pd.to_datetime(api_cache['new_time']),
                              ControlStn_y=api_cache['ControlStn_y'],
                              Data=api_cache['Data'])

        mask = (cache_data.Time.day == date_time.day) & (cache_data.Time.hour == date_time.hour) & (cache_data.ControlStn_y == station)

        if mask:

            # print(f"Using previously collected data for Station {station}")

            return cache_data.Data['data'][0]['v']

    params['begin_date'] = date_time.strftime('%Y%m%d %H:%M')
    params['station'] = station

    try:

        response = requests.get(api_url, params=params)
        response.raise_for_status()
        api_data = response.json()
        api_cache['new_time'] = date_time
        api_cache['ControlStn_y'] = station
        api_cache['Data'] = api_data

        # print(f"API response for Station {station}, Time {date_time}: {api_data}")

        return api_data['data'][0]['v']

    except JSONDecodeError as e:
        print(f"JSONDecodeError for Station {station}, Time {date_time}: {e}")
    except requests.RequestException as e:
        print(f"RequestException for Station {station}, Time {date_time}: {e}")

final_df['WaterLevel'] = final_df.apply(make_api_call, axis=1)




**Final Dataframe**

Uncomment the line of code below to see the final dataframe.
Run the last two cells to perform the corrections and create a CSV

In [None]:

final_df['CorrectedDepth'] = final_df['Depth'] - (final_df['WaterLevel'].astype(float) * final_df['RangeRatio'].astype(float))
# print(final_df)

        Latitude  Longitude  Depth                 Time  index_right  \
0      37.455373 -75.977406  357.5  2009-12-02T15:23:50       1146.0   
1      37.456211 -75.974469  230.5  2009-12-02T15:47:33       1146.0   
2      37.558602 -75.943757   23.5  2009-12-03T13:43:22       1163.0   
3      37.554225 -75.937235  211.5  2009-12-03T14:02:11       1163.0   
4      37.551207 -75.920913  193.5  2009-12-03T15:15:44       1153.0   
...          ...        ...    ...                  ...          ...   
15339  37.504261 -75.791875   33.5  2010-02-02T18:32:33        132.0   
15340  37.505585 -75.793879   33.5  2010-02-02T18:32:33        132.0   
15341  37.506215 -75.796043   33.5  2010-02-02T18:32:33        132.0   
15342  37.507066 -75.798415   31.5  2010-02-02T18:32:33        132.0   
15343  37.507460 -75.800984   33.5  2010-02-02T18:32:33        132.0   

      ControlStn_x  ATCorr    RR ControlS_1  ATCorr2  ...  AvgTimeCorr  \
0          8636580   -48.0  1.59       None      0.0  ...    

In [None]:

selected_columns = ['Latitude', 'Longitude', 'CorrectedDepth']
new_df = final_df[selected_columns]

new_df.to_csv('output.csv', index=False)
