In [4]:
from data_loader import GliderDataLoader, GulfStreamLoader, MPADataLoader

# Always fetches full list of available RT.txt files
loader = GliderDataLoader(filenames=['25720901RT.txt'],include_qc=False)

# You can inspect all available filenames
# print(loader.get_available_files())

# Load the most recent file (automatically done if no filename provided)
df_latest = loader.load_data()

# SN209 = df_latest[loader.file_list[1]]

print(loader.file_list)

# # Load specific file(s)
# loader = DataLoader(filenames=["SPL123_RT.txt", "SPL124_RT.txt"])
# dfs = loader.load_data()  # dict of DataFrames
gs = GulfStreamLoader()
coords = gs.load_data()


['25720901RT.txt']


In [3]:
from data_loader import GliderDataLoader

load = GliderDataLoader('25820301RT.txt')
df = load.load_data()

In [None]:
from data_loader import GliderDataLoader, GulfStreamLoader, MPADataLoader, MapDataLoader
from nessie_interpolation_function import create_spatial_interpolation
import datetime

loader = MapDataLoader()
df = loader.load_data()


fig, metadata = create_spatial_interpolation(
    df, 
    parameter='rhodamine', 
    hours_back=24, 
    platform_filter=None, 
    layer_filter=None, 
    grid_resolution=80, 
    method='linear',
    include_map_overlay=True  # �� New parameter!
)

fig.show()


Platform filter: 686 points after filtering
Layer filter: 686 points after filtering
Time filter: 412 points in last 24 hours


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [1]:
from data_loader import gomofsdataloader

loader = gomofsdataloader()
df = loader.load_data()


In [5]:
import pandas as pd

def range_slider_marks(df, target_mark_count=10):
    """
    Generate a marks dictionary for a Dash RangeSlider using ~target_mark_count evenly spaced labels.

    Parameters:
    ----------
    df : pandas.DataFrame
        Must contain 'Datetime' and 'UnixTimestamp' columns.
    target_mark_count : int
        Desired number of marks on the slider.

    Returns:
    -------
    dict
        Dictionary of {UnixTimestamp: formatted datetime string}
    """
    # Make sure we work with sorted data
    df = df.sort_values("Datetime")

    unix_min = df["UnixTimestamp"].min()
    unix_max = df["UnixTimestamp"].max()
    unix_range = unix_max - unix_min

    if unix_range <= 0:
        return {}

    # Compute interval between marks (in seconds)
    interval_seconds = unix_range // target_mark_count

    # Create a list of evenly spaced timestamps
    marks = {}
    for t in range(int(unix_min), int(unix_max) + 1, int(interval_seconds)):
        dt = pd.to_datetime(t, unit='s')
        marks[t] = dt.strftime('%m/%d %H:%M')

    return marks


In [6]:
marks = range_slider_marks(df_latest, 10)

In [6]:
import numpy as np
def get_first_10_pH_average(df_latest):
    df_MLD_average = df_latest.drop_duplicates(subset=['Station', 'Cruise'], keep='first').copy()

    # Initialize new columns
    df_MLD_average['pHinsitu[Total]'] = np.nan
    df_MLD_average['Chl_a[mg/m^3]'] = np.nan

    # Group by both Station and Cruise
    for (station, cruise), group in df_latest.groupby(['Station', 'Cruise']):
        first_10 = group.head(5)
        
        if 'pHinsitu[Total]' in first_10.columns:
            avg_pH = first_10['pHinsitu[Total]'].mean()
            avg_chl = first_10['Chl_a[mg/m^3]'].mean()
            
            # Add directly to DataFrame using both Station and Cruise
            mask = (df_MLD_average['Station'] == station) & (df_MLD_average['Cruise'] == cruise)
            df_MLD_average.loc[mask, 'pHinsitu[Total]'] = avg_pH
            df_MLD_average.loc[mask, 'Chl_a[mg/m^3]'] = avg_chl

    # Keep only the columns you want
    df_MLD_average = df_MLD_average[['Station', 'Cruise', 'Lat [°N]', 'Lon [°E]', 'pHinsitu[Total]', 'Chl_a[mg/m^3]']]

    return df_MLD_average

df_map = get_first_10_pH_average(df_latest)

In [9]:
df_map_filter = df_map[df_map["Cruise"].astype(str).str.contains("209")]


In [None]:
def MLDaverage(df_latest):
    # Remove rows where Depth[m] or Sigma_theta[kg/m^3] is NaN
    df_clean = df_latest.dropna(subset=['Depth[m]', 'Sigma_theta[kg/m^3]'])
    
    # For each station, find the row where Depth[m] is closest to 0
    idx = df_clean.groupby('Station')['Depth[m]'].apply(lambda x: (x.abs()).idxmin())
    
    # Get the corresponding values
    df_surface = df_clean.loc[idx, ['Station', 'Depth[m]', 'Sigma_theta[kg/m^3]']].reset_index(drop=True)
    
    return df_surface

df_MLD_average = MLDaverage(df_latest)

In [None]:
import numpy as np
import pandas as pd
def get_surface_to_mld_pH_average(df_latest):
    # Step 1: Get surface sigma values
    df_clean = df_latest.dropna(subset=['Depth[m]', 'Sigma_theta[kg/m^3]'])
    surface_idx = df_clean.groupby('Station')['Depth[m]'].apply(lambda x: (x.abs()).idxmin())
    df_surface = df_clean.loc[surface_idx, ['Station', 'Depth[m]', 'Sigma_theta[kg/m^3]']].set_index('Station')
    
    results = []
    
    # Step 2: For each station, find MLD depth and calculate pH averages
    for station in df_surface.index:
        station_data = df_clean[df_clean['Station'] == station].copy()
        surface_sigma = df_surface.loc[station, 'Sigma_theta[kg/m^3]']
        surface_depth = df_surface.loc[station, 'Depth[m]']
        
        # Find where sigma is 0.03 greater than surface
        target_sigma = surface_sigma + 0.03
        
        # Sort by depth to ensure we're going from surface downward
        station_data = station_data.sort_values('Depth[m]')
        
        # Find first depth where sigma >= target_sigma
        mld_candidates = station_data[station_data['Sigma_theta[kg/m^3]'] >= target_sigma]
        
        if not mld_candidates.empty:
            mld_depth = mld_candidates.iloc[0]['Depth[m]']
            
            # Get all data between surface and MLD depth
            mask = (station_data['Depth[m]'] >= surface_depth) & (station_data['Depth[m]'] <= mld_depth)
            layer_data = station_data[mask]
            
            if not layer_data.empty:
                # Calculate pH average only
                if 'pHinsitu[Total]' in layer_data.columns:
                    avg_pH = layer_data['pHinsitu[Total]'].mean()
                    
                    results.append({
                        'Station': station,
                        'Surface_Depth': surface_depth,
                        'MLD_Depth': mld_depth,
                        'Surface_Sigma': surface_sigma,
                        'MLD_Sigma': target_sigma,
                        'Layer_Thickness': mld_depth - surface_depth,
                        'Avg_pHinsitu[Total]': avg_pH
                    })
    
    return pd.DataFrame(results)

# Usage
df_pH_mld_averages = get_surface_to_mld_pH_average(df_latest)

In [None]:
def get_first_10_pH_average(df_latest):
    df_MLD_average = df_latest.drop_duplicates(subset=['Station'], keep='first').copy()

    # Initialize new columns
    df_MLD_average['pHinsitu[Total]'] = np.nan
    df_MLD_average['Chl_a[mg/m^3]'] = np.nan

    for station in df_latest['Station'].unique():
        station_data = df_latest[df_latest['Station'] == station]
        first_10 = station_data.head(5)
        
        if 'pHinsitu[Total]' in first_10.columns:
            avg_pH = first_10['pHinsitu[Total]'].mean()
            avg_chl = first_10['Chl_a[mg/m^3]'].mean()
            
            # Add directly to DataFrame
            mask = df_MLD_average['Station'] == station
            df_MLD_average.loc[mask, 'pHinsitu[Total]'] = avg_pH
            df_MLD_average.loc[mask, 'Chl_a[mg/m^3]'] = avg_chl

    # Keep only the columns you want
    df_MLD_average = df_MLD_average[['Station', 'Lat [°N]', 'Lon [°E]', 'Avg_pHinsitu[Total]', 'Avg_Chl_a[mg/m^3]']]

    return df_MLD_average

# Usage
df_averages = get_first_10_pH_average(df_latest)

In [None]:
df_clean = df_latest.dropna(subset=['Depth[m]', 'Sigma_theta[kg/m^3]'])
surface_idx = df_clean.groupby('Station')['Depth[m]'].apply(lambda x: (x.abs()).idxmin())
df_surface = df_clean.loc[surface_idx, ['Station', 'Depth[m]', 'Sigma_theta[kg/m^3]']].set_index('Station')