# Survey Visualizatoin
This notebook helps a user locate the start and stop times of different surveys patterns

## Import Packages

In [None]:
import os
import sys
from pathlib import Path
import json
import pandas as pd
from datetime import datetime, timedelta
import tiledb
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import Normalize
import matplotlib.cm as cm
import matplotlib.dates as mdates

from es_sfgtools.processing.pipeline import DataHandler
from es_sfgtools.utils.archive_pull import list_survey_files

from es_sfgtools.utils.loggers import set_all_logger_levels, GNSSLogger, ProcessLogger
from es_sfgtools.utils.metadata.site import import_site


## Set Survey parameters

In [None]:
# Input survey parameters
network='cascadia-gorda'
site='GCC1'
campaign_name='2024_A_1126'
vessel_type = 'SV3'

## Load and Inspect Existing Metadata

In [None]:
# Path to the metadata file
metadata_uri = "./site_vessels/GCC1.2025-03-19.json"

# Load and inspect existing metadata
print(f"Loading metadata from {metadata_uri} ... \n Contains the following campaigns and surveys:")
site_meta = import_site(metadata_uri)

for campaign in site_meta.campaigns:
    if campaign.name == campaign_name:
        start = campaign.start
        end = campaign.end
        print(f"  Campaign: {campaign.name} \n   Start: {start} \n    End: {end}")

        for survey in campaign.surveys:
            print(f"  Survey: {survey.id} \n   Start: {survey.start} \n    End: {survey.end}")

## Set & create the data directory

In [None]:

# Set data directory path for local environment
directory = './data/sfg'

# Create data directory if it doesn't exist
data_dir = Path(f"{os.path.expanduser(directory)}")
os.makedirs(data_dir, exist_ok=True)

#### USE THE FOLLOWING DEFAULTS UNLESS DESIRED ####
data_handler = DataHandler(directory=data_dir)
data_handler.change_working_station(network=network, 
                                    station=site, 
                                    campaign=campaign_name, 
                                    start_date=start.date(), 
                                    end_date=end.date())

if vessel_type == 'SV3':
    pipeline, config = data_handler.get_pipeline_sv3()
elif vessel_type == 'SV2':
    pipeline, config = data_handler.get_pipeline_sv2()
else:
    raise ValueError(f"Vessel type {vessel_type} not recognized")

shotdata_uri = f"{directory}{network}/{site}/TileDB/shotdata_db.tdb"

# Get the acoustic (DFOP00) files from the archive

In [None]:
# Get DFOP00 file list from the archive
remote_filepaths = list_survey_files(network=network, station=site, survey=campaign_name)

# Add the data to the data handler
data_handler.add_data_remote(remote_filepaths=remote_filepaths)

# Download the data
data_handler.download_data(file_types='dfop00', override=False)

## Read DFOP00 files into shotdata array

In [None]:
# Read DFOP00 files into shotdata array
config.dfop00_config.override=True
pipeline.config = config
pipeline.process_dfop00()

In [None]:
def read_df(uri, start: datetime, end: datetime = None, **kwargs) -> pd.DataFrame:
    """ 
    Read a dataframe from the array between the start and end dates 
    
    Args:
        start (datetime.datetime): The start date
        end (datetime.datetime, optional): The end date. Defaults to None.
    
    Returns:
        pd.DataFrame: dataframe
    """ 

    print(f"Reading dataframe from {uri} for {start} to {end}")
    # TODO slice array by start and end and return the dataframe
    if end is None:
        end = start + timedelta(days=1)
    with tiledb.open(str(uri), mode="r") as array:
        try:
            df = array.df[slice(np.datetime64(start), np.datetime64(end)),:]
        except IndexError as e:
            print(e)
            return None
    # self.dataframe_schema.validate(df, lazy=True)
    return df

# Get the start and end dates of the campaign
campaign_start = data_handler.date_range[0]
campaign_end = data_handler.date_range[1]

# Read the dataframe
df = read_df(shotdata_uri, start=campaign_start, end=campaign_end)

# Show the dataframe
df

## Plot waveglider locations

In [None]:
#start with no surveys, or load some from the metadata if exists

surveys = []

#meta = json.loads(Path(metadata_uri).read_text())
#surveys = meta['campaigns'][0]['surveys']

surveys

In [None]:
def plot_en(df, surveys, save_as=None):
    fig, axs = plt.subplots(nrows=2, figsize=(16,10))  
    #title = f"{survey_name} {survey_type}"
    #save_as = f"{survey_name}_{survey_type}_en.png"
    #fig.suptitle(title)
    axs[0].set_ylabel("East (m)")
    axs[1].set_ylabel("North (m)")
    # colormap_times = df["triggerTime"].apply(lambda x:x.timestamp()).to_numpy()
    # colormap_times_scaled = (colormap_times - colormap_times.min())/3600
    # norm = Normalize(
    #     vmin=0,
    #     vmax=(colormap_times.max() - colormap_times.min()) / 3600,
    # )
    sc0 = axs[0].scatter(
        df["triggerTime"],
        df["east0"],
        # c=colormap_times_scaled,
        # cmap="viridis",
        # norm=norm,
        alpha=0.25
    )
    sc1 = axs[1].scatter(
        df["triggerTime"],
        df["north0"],
        # c=colormap_times_scaled,
        # cmap="viridis",
        # norm=norm,
        alpha=0.25
    )

    survey_colors = cm.rainbow(np.linspace(0, 1, len(surveys)))
    for ax in axs:
        for i, survey in enumerate(surveys):
            start = survey['start']
            end = survey['end']
            label = survey['type'] + " " + survey.get('notes',"")
            ax.axvspan(pd.to_datetime(start),pd.to_datetime(end), color=survey_colors[i], alpha=0.3, label=label)
            #ax.axvline(x=pd.to_datetime(end), color='r', linestyle='--')
        # Make ticks on occurrences of each month:
        ax.xaxis.set_major_locator(mdates.DayLocator())
        # Get only the month to show in the x-axis:
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
        
    plt.xticks(rotation=90)
    axs[0].legend()
    if save_as is not None:
        plt.savefig(save_as)

def plot_wg_position(df, survey_name, survey_type, start, end):
    
    temp_df = df[df['triggerTime']>=start]
    temp_df = temp_df[df['triggerTime']<=end]
    fig, ax = plt.subplots(figsize=(16,10))
    title = f"{survey_name} {survey_type} from {start.isoformat()} to {end.isoformat()}"
    save_as = f"{survey_name}_{survey_type}.png"
    fig.suptitle(title)
    ax.set_xlabel("East (m)")
    ax.set_ylabel("North (m)")
    #ax.scatter(0,0,label="Origin",color="magenta",s=100)
    colormap_times = temp_df["triggerTime"].apply(lambda x:x.timestamp()).to_numpy()
    colormap_times_scaled = (colormap_times - colormap_times.min())/3600

    norm = Normalize(
        vmin=0,
        vmax=(colormap_times.max() - colormap_times.min()) / 3600,
    )
    sc = ax.scatter(
        temp_df["east0"],
        temp_df["north0"],
        c=colormap_times_scaled,
        cmap="viridis",
        label="Antenna Position",
        norm=norm,
        alpha=0.25
    )
    cbar = plt.colorbar(sc,label="Time (hr)",norm=norm)
    ax.legend()
    plt.savefig(save_as)
    print(temp_df.triggerTime.iloc[0].isoformat(), temp_df.triggerTime.iloc[-1].isoformat())


In [None]:
#adjust to remove any junk data at beginning or end if needed
start = df.triggerTime.iloc[0] + timedelta(hours=0)
end = df.triggerTime.iloc[-1]
temp_df = df[df['triggerTime']>=start]
temp_df = temp_df[df['triggerTime']<=end]

plot_en(temp_df, surveys)
plot_wg_position(df, f"{site}_{campaign}", "all", start, end)

In [None]:
##Update these for the survey you want to plot
survey_name = f"{site}_{campaign}_4"
survey_type = "fixed"
start = datetime.fromisoformat("2022-05-04T22:00:00")
end = datetime.fromisoformat("2022-05-10T12:17:00")

##if you already have a survey in the metadata, you can load it and adjust from there
#start = datetime.fromisoformat(survey['start'])
#end = datetime.fromisoformat(survey['end'])
# start += timedelta(hours=0)
# end -= timedelta(hours=6)

print(start.isoformat(), end.isoformat())

survey = {
    'start': start.isoformat(),
    'end': end.isoformat(),
    'type': survey_type,
    'name': survey_name
}
plot_en(temp_df, [survey])
plot_wg_position(df, survey_name, survey_type, start, end)

In [None]:
#use library tools to add survey to metadata
# not yet implemented

In [None]:
#before running this cell, update the json file with the new surveys

meta = json.loads(Path(metadata_uri).read_text())
for campaign_meta in meta['campaigns']:
    if campaign_meta['name'] == campaign:   
        surveys = campaign_meta['surveys']

plot_en(temp_df, surveys, save_as=f"{site}_{campaign}_surveys.png")

In [None]:
survey=surveys[4]
survey_name = f"{site}_{survey['id']}"
survey_type = survey['type']
start = datetime.fromisoformat(survey['start'])
end = datetime.fromisoformat(survey['end'])
plot_wg_position(df, survey_name, survey_type, start, end)
print(start.isoformat(), end.isoformat())

In [None]:
campaign

In [None]:
for survey in surveys:
    survey_name = f"{site}_{survey['id']}"
    survey_type = survey['type']
    start = datetime.fromisoformat(survey['start'])
    end = datetime.fromisoformat(survey['end'])
    plot_wg_position(df, survey_name, survey_type, start, end)