In [4]:
# Establish necessary modules

import paramiko
import functools
import os
from tqdm import tqdm
from urllib.request import urlretrieve
import pandas as pd
import xarray as xr
from glob import glob
import numpy as np
import matplotlib.pyplot as plt

In [6]:
# Establish necessary directories

parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_dir   = os.path.join(parent_dir, 'Data')
navis_dir  = os.path.join(data_dir, 'navis')
navis_101  = os.path.join(navis_dir, 'navis_101')
navis_102  = os.path.join(navis_dir, 'navis_102')
n101_folder = os.path.join(navis_101, 'csv_files')
n102_folder = os.path.join(navis_102, 'csv_files')

# If directories don't exist, create them
if not os.path.exists(navis_dir):
    os.makedirs(navis_dir)
if not os.path.exists(navis_101):
    os.makedirs(navis_101)
if not os.path.exists(navis_102):
    os.makedirs(navis_102)

In [None]:
# Navis float download section

# Open a transport
host,port = "rudics.thorium.cls.fr",22
transport = paramiko.Transport((host,port))

# Auth
username_list = ['s_f1101','s_f1102']
password_list = ["KhuBGFTj","hckrJHGY"]
directory_list = [navis_101, navis_102]

#username,password = "s_f1102","hckrJHGY"
#username,password = "s_f1101","KhuBGFTj"

for username, password, out_dir in zip(username_list, password_list, directory_list):
    
    transport = paramiko.Transport((host,port))
    transport.connect(None,username,password)

    # Create an SFTP session
    sftp = paramiko.SFTPClient.from_transport(transport)
    print("Session opened")

    # List files in the root directory or any specific directory
    directory = '/'  # Change this to your target directory
    files = sftp.listdir(directory)

    # Print the list of files
    print(f"Files in {directory}:")
    for file in files:
        print(file)

    msg_files = [file for file in files if file.endswith('.msg')]
    # Download each .msg file
    for msg_file in tqdm(msg_files):
        local_path = os.path.join(out_dir, msg_file)
        remote_path = os.path.join(directory, msg_file)

        sftp.get(remote_path, local_path)

    print("Download complete.")

    sftp.close()

    print("Session closed")

In [13]:
# Other, WMO, floats

wmo_list = [4903532, 1902637]
#Float 1 = test float in the Icelandic Bassin
float_1_url = 'https://data-argo.ifremer.fr/dac/aoml/4903532/4903532_Sprof.nc'
#Float 2 = test float on Custard with glider next to it
float_2_url = 'https://data-argo.ifremer.fr/dac/coriolis/1902637/1902637_Sprof.nc'

#List the floats
floats_url = [float_1_url, float_2_url]

#Assign the local float directory
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
floats_dir =  os.path.join(parent_dir, 'Data\\Floats')

#Create floats filename
floats_filenames = []
for i in floats_url:
    filename = floats_dir + '/' + i.rsplit('/', 1)[1]
    floats_filenames.append(filename)

position_df = pd.DataFrame({'PROF_NUM' : str(), 'LONGITUDE' : [], 'LATITUDE' : [], 'float' : int()})
last_position_df = pd.DataFrame({'PROF_NUM' : str(), 'LONGITUDE' : [], 'LATITUDE' : [], 'float' : int()})

for file, wmo in zip(floats_filenames, wmo_list):
    dat = xr.open_dataset(file)
    dat = dat.rename({'CYCLE_NUMBER':'PROF_NUM'}).swap_dims({'N_PROF':'PROF_NUM'})
    temp_df = dat[['LONGITUDE', 'LATITUDE', 'JULD']].to_dataframe().reset_index()
    temp_df['float'] = wmo
    last_temp_df = temp_df[temp_df['JULD'] == max(temp_df['JULD'])]

    last_position_df = pd.concat([last_position_df, last_temp_df], ignore_index=True)
    position_df = pd.concat([position_df, temp_df], ignore_index=True)
    dat.close()

  last_position_df = pd.concat([last_position_df, last_temp_df], ignore_index=True)
  position_df = pd.concat([position_df, temp_df], ignore_index=True)


In [14]:
# Processing all float data into single dataframe

def open_navis(filepath, float_ref):
    raw_var = ['mtime', 'pnum', 'lat', 'lon', 'pres', 'T', 'C', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
    new_colnames = ['datetime', 'prof', 'lat', 'lon', 'pres', 'temp', 'conductivity', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
    df = pd.read_csv(filepath)
    df_raw = df[raw_var]
    df_raw.columns = new_colnames

    df_computed = df_raw.copy()

    df_computed.loc[:, 'Fchl'] = (df_raw['mcoms1'] - chl_dark) * chl_slope
    df_computed.loc[:, 'beta'] = (df_raw['mcoms2'] - beta_dark) * beta_slope
    df_computed.loc[:, 'fdom'] = (df_raw['mcoms3'] - fdom_dark) * fdom_slope

    df_computed.loc[:, 'float'] = float_ref

    return(df_computed)

def clean_datetime_column(df, column):
    # Identify rows that do not match the expected datetime format
    mask = df[column].str.contains(r'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}$', regex=True)
    
    # Print the rows that are problematic
    print("\nProblematic entries:")
    print(df[~mask])
    
    # Filter out problematic rows or handle them accordingly
    cleaned_df = df[mask].copy()
    
    return cleaned_df

#set the cal val for N101
chl_dark = 51
chl_slope = 0.001553

beta_dark = 92
beta_slope = 0.0000002485

fdom_dark = 52
fdom_slope = 0.01118

new_colnames = ['datetime', 'prof', 'lat', 'lon', 'pres', 'temp', 'conductivity', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
compiled_101 = pd.DataFrame(columns=new_colnames)
for file in tqdm(os.listdir(n101_folder)):
    filepath = os.path.join(n101_folder, file)
    temp_df = open_navis(filepath, 'navis101')
    compiled_101 = pd.concat([compiled_101, temp_df], ignore_index= True)

#set the cal val for N102
chl_dark = 50
chl_slope = 0.002006

beta_dark = 49
beta_slope = 0.0000003524

fdom_dark = 51
fdom_slope = 0.006619

new_colnames = ['datetime', 'prof', 'lat', 'lon', 'pres', 'temp', 'conductivity', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
compiled_102 = pd.DataFrame(columns=new_colnames)
for file in tqdm(os.listdir(n102_folder)):
    filepath = os.path.join(n102_folder, file)
    temp_df = open_navis(filepath, 'navis102')
    compiled_102 = pd.concat([compiled_102, temp_df], ignore_index= True)

navis_df = pd.concat([compiled_101, compiled_102])

navis_position = navis_df[['datetime', 'prof', 'float', 'lon', 'lat']].drop_duplicates(subset = ['prof', 'float'])
navis_df['JULD'] = pd.to_datetime(navis_df['datetime'])
navis_df.to_csv(os.path.join(navis_dir, 'merged_table'))

navis_position.columns = ['JULD', 'PROF_NUM', 'float', 'LONGITUDE', 'LATITUDE']
navis_position['JULD'] = navis_position['JULD'].astype(str)

cleaned_navis_position = navis_position.copy()
cleaned_navis_position.head()
cleaned_navis_position['JULD'] = pd.to_datetime(cleaned_navis_position['JULD'])

navis_position['JULD'] = pd.to_datetime(navis_position['JULD'])

  compiled_101 = pd.concat([compiled_101, temp_df], ignore_index= True)
100%|██████████| 88/88 [00:00<00:00, 108.33it/s]
  compiled_102 = pd.concat([compiled_102, temp_df], ignore_index= True)
100%|██████████| 93/93 [00:00<00:00, 113.26it/s]


In [15]:
# Output with all float positions, up to date.

full_position = pd.concat([position_df, navis_position], ignore_index = True)
full_position.to_csv(os.path.join(floats_dir, 'Float_positions.csv'))