In [1]:
# Establish necessary modules

import paramiko
import functools
import os
from tqdm import tqdm
from urllib.request import urlretrieve
import pandas as pd
import xarray as xr
from glob import glob
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Establish necessary directories

parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
data_dir   = os.path.join(parent_dir, 'Data')
navis_dir  = os.path.join(data_dir, 'navis')
navis_101  = os.path.join(navis_dir, 'navis_101')
navis_102  = os.path.join(navis_dir, 'navis_102')
n101_folder = os.path.join(navis_101, 'csv_files')
n102_folder = os.path.join(navis_102, 'csv_files')

# If directories don't exist, create them
if not os.path.exists(navis_dir):
    os.makedirs(navis_dir)
if not os.path.exists(navis_101):
    os.makedirs(navis_101)
if not os.path.exists(navis_102):
    os.makedirs(navis_102)

In [3]:
# Navis float download section

# Open a transport
host,port = "rudics.thorium.cls.fr",22
transport = paramiko.Transport((host,port))

# Auth
username_list = ['s_f1101','s_f1102']
password_list = ["KhuBGFTj","hckrJHGY"]
directory_list = [navis_101, navis_102]

for username, password, out_dir in zip(username_list, password_list, directory_list):
    
    transport = paramiko.Transport((host,port))
    transport.connect(None,username,password)

    # Create an SFTP session
    sftp = paramiko.SFTPClient.from_transport(transport)
    print("Session opened")

    # List files in the root directory or any specific directory
    directory = '/'  # Change this to your target directory
    files = sftp.listdir(directory)

    # Print the list of files
    print(f"Files in {directory}:")
    for file in files:
        print(file)

    msg_files = [file for file in files if file.endswith('.msg')]
    # Download each .msg file
    for msg_file in tqdm(msg_files):
        local_path = os.path.join(out_dir, msg_file)
        remote_path = os.path.join(directory, msg_file)

        sftp.get(remote_path, local_path)

    print("Download complete.")

    sftp.close()

    print("Session closed")

Session opened
Files in /:
1101.077.isus
1101.087.log
1101.061.msg
1101.003.msg
1101.049.msg
1101.029.isus
1101.073.msg
1101.2207250221.log
1101.2207250341.log
1101.2204270739.log
1101.019.msg
backups
1101.007.msg
1101.079.log
1101.036.isus
1101.053.log
1101.065.msg
1101.2204270739
1101.2207250421
1101.061.log
1101.005.msg
1101.034.msg
1101.071.msg
1101.032.msg
1101.2207250055.log
1101.042.log
1101.050.isus
1101.090.log
1101.052.msg
1101.030.isus
1101.000.msg
1101.086.log
1101.088.isus
1101.048.log
1101.089.log
1101.024.msg
1101.049.isus
1101.065.log
1101.013.log
1101.032.log
1101.057.log
1101.027.isus
1101.060.log
1101.014.isus
1101.010.msg
.MakeCmd
1101.057.isus
1101.072.isus
1101.084.isus
1101.056.msg
.bash_logout
1101.081.msg
1101.047.msg
1101.041.msg
1101.025.log
1101.063.log
1101.009.isus
1101.053.isus
1101.075.msg
1101.014.log
1101.033.isus
1101.004.msg
1101.059.log
1101.074.msg
.sxrc
1101.045.log
1101.024.log
1101.011.log
1101.036.msg
1101.037.log
1101.2204270834
.history
1101.

100%|██████████| 91/91 [00:44<00:00,  2.05it/s]


Download complete.
Session closed
Session opened
Files in /:
1102.043.isus
1102.2207170118
1102.020.msg
1102.089.msg
1102.087.isus
1102.080.log
1102.010.msg
1102.009.isus
1102.088.isus
1102.093.isus
1102.063.isus
1102.023.msg
1102.013.msg
1102.2206211049
1102.009.log
backups
1102.046.log
1102.003.msg
1102.060.msg
1102.086.isus
1102.2207170139
1102.042.isus
1102.083.isus
1102.012.log
1102.024.isus
1102.048.msg
1102.052.log
1102.090.msg
1102.088.log
1102.2207170158.log
1102.027.log
1102.084.log
1102.075.isus
1102.047.log
1102.045.isus
1102.068.msg
1102.089.isus
1102.057.isus
1102.070.log
1102.049.msg
1102.076.msg
1102.064.isus
1102.041.log
1102.001.isus
1102.2207170038
1102.2207162228
1102.2207170158
.MakeCmd
1102.2206160929
1102.2207162330.log
1102.2206211123
1102.050.isus
1102.024.log
1102.053.log
1102.078.msg
1102.005.msg
.bash_logout
1102.2207170038.log
1102.019.log
1102.2207162356.log
1102.017.msg
1102.007.log
1102.014.isus
1102.042.msg
1102.058.isus
1102.017.log
1102.043.msg
1102.0

100%|██████████| 96/96 [00:43<00:00,  2.20it/s]

Download complete.
Session closed





In [3]:
# Other, WMO, floats

wmo_list = [4903532, 1902637]
#Float 1 = test float in the Icelandic Bassin
float_1_url = 'https://data-argo.ifremer.fr/dac/aoml/4903532/4903532_Sprof.nc'
#Float 2 = test float on Custard with glider next to it
float_2_url = 'https://data-argo.ifremer.fr/dac/coriolis/1902637/1902637_Sprof.nc'

#List the floats
floats_url = [float_1_url, float_2_url]

#Assign the local float directory
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
floats_dir =  os.path.join(parent_dir, 'Data\\Floats')

#Create floats filename
floats_filenames = []
for i in floats_url:
    filename = floats_dir + '/' + i.rsplit('/', 1)[1]
    floats_filenames.append(filename)

for url, filename in zip(floats_url, floats_filenames):
    urlretrieve(url, filename)

position_df = pd.DataFrame({'PROF_NUM' : str(), 'LONGITUDE' : [], 'LATITUDE' : [], 'float' : int()})
last_position_df = pd.DataFrame({'PROF_NUM' : str(), 'LONGITUDE' : [], 'LATITUDE' : [], 'float' : int()})

for file, wmo in zip(floats_filenames, wmo_list):
    dat = xr.open_dataset(file)
    dat = dat.rename({'CYCLE_NUMBER':'PROF_NUM'}).swap_dims({'N_PROF':'PROF_NUM'})
    temp_df = dat[['LONGITUDE', 'LATITUDE', 'JULD']].to_dataframe().reset_index()
    temp_df['float'] = wmo
    last_temp_df = temp_df[temp_df['JULD'] == max(temp_df['JULD'])]
    print(temp_df['JULD'])
    last_position_df = pd.concat([last_position_df, last_temp_df], ignore_index=True)
    position_df = pd.concat([position_df, temp_df], ignore_index=True)
    dat.close()

0    2023-05-07 07:30:52.000794624
1    2023-05-17 04:56:51.000510464
2    2023-05-27 02:10:14.000229376
3    2023-06-05 23:48:13.002537472
4    2023-06-15 20:47:59.002213888
5    2023-06-25 18:05:52.001938432
6    2023-07-05 15:24:51.001644032
7    2023-07-15 15:13:08.001628160
8    2023-07-25 14:32:29.001548288
9    2023-08-04 13:47:56.001462784
10   2023-08-14 13:27:13.001434112
11   2023-08-24 10:41:40.001132544
12   2023-09-03 07:57:23.000834048
13   2023-09-13 06:06:02.000652288
14   2023-09-23 06:23:46.000677376
15   2023-10-03 04:46:04.000496128
16   2023-10-13 02:05:46.000221696
17   2023-10-23 00:04:54.000005632
18   2023-11-01 21:27:41.002292224
19   2023-11-11 21:06:30.002262016
20   2023-11-21 22:03:41.002364416
21   2023-12-02 00:09:26.000012800
22   2023-12-11 21:34:13.002302464
23   2023-12-21 18:51:03.002005504
24   2023-12-31 16:50:41.001788928
25   2024-01-10 14:21:20.001532416
26   2024-01-20 12:47:22.001355776
27   2024-01-30 12:51:55.001361408
28   2024-02-09 11:0

  last_position_df = pd.concat([last_position_df, last_temp_df], ignore_index=True)
  position_df = pd.concat([position_df, temp_df], ignore_index=True)


In [4]:
# Processing all float data into single dataframe

def open_navis(filepath, float_ref):
    raw_var = ['mtime', 'pnum', 'lat', 'lon', 'pres', 'T', 'C', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
    new_colnames = ['datetime', 'prof', 'lat', 'lon', 'pres', 'temp', 'conductivity', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
    df = pd.read_csv(filepath)
    df_raw = df[raw_var]
    df_raw.columns = new_colnames

    df_computed = df_raw.copy()

    df_computed.loc[:, 'Fchl'] = (df_raw['mcoms1'] - chl_dark) * chl_slope
    df_computed.loc[:, 'beta'] = (df_raw['mcoms2'] - beta_dark) * beta_slope
    df_computed.loc[:, 'fdom'] = (df_raw['mcoms3'] - fdom_dark) * fdom_slope

    df_computed.loc[:, 'float'] = float_ref

    return(df_computed)

def clean_datetime_column(df, column):
    # Identify rows that do not match the expected datetime format
    mask = df[column].str.contains(r'^\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}$', regex=True)
    
    # Print the rows that are problematic
    print("\nProblematic entries:")
    print(df[~mask])
    
    # Filter out problematic rows or handle them accordingly
    cleaned_df = df[mask].copy()
    
    return cleaned_df

#set the cal val for N101
chl_dark = 51
chl_slope = 0.001553

beta_dark = 92
beta_slope = 0.0000002485

fdom_dark = 52
fdom_slope = 0.01118

new_colnames = ['datetime', 'prof', 'lat', 'lon', 'pres', 'temp', 'conductivity', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
compiled_101 = pd.DataFrame(columns=new_colnames)
for file in tqdm(os.listdir(n101_folder)):
    filepath = os.path.join(n101_folder, file)
    temp_df = open_navis(filepath, 'navis101')
    compiled_101 = pd.concat([compiled_101, temp_df], ignore_index= True)

#set the cal val for N102
chl_dark = 50
chl_slope = 0.002006

beta_dark = 49
beta_slope = 0.0000003524

fdom_dark = 51
fdom_slope = 0.006619

new_colnames = ['datetime', 'prof', 'lat', 'lon', 'pres', 'temp', 'conductivity', 'oxy1', 'oxy2', 'mcoms1', 'mcoms2', 'mcoms3']
compiled_102 = pd.DataFrame(columns=new_colnames)
for file in tqdm(os.listdir(n102_folder)):
    filepath = os.path.join(n102_folder, file)
    temp_df = open_navis(filepath, 'navis102')
    compiled_102 = pd.concat([compiled_102, temp_df], ignore_index= True)

navis_df = pd.concat([compiled_101, compiled_102])

navis_position = navis_df[['datetime', 'prof', 'float', 'lon', 'lat']].drop_duplicates(subset = ['prof', 'float'])
navis_df['JULD'] = pd.to_datetime(navis_df['datetime'])
navis_df.to_csv(os.path.join(navis_dir, 'merged_table'))

navis_position.columns = ['JULD', 'PROF_NUM', 'float', 'LONGITUDE', 'LATITUDE']
navis_position['JULD'] = navis_position['JULD'].astype(str)

cleaned_navis_position = navis_position.copy()
cleaned_navis_position.head()
cleaned_navis_position['JULD'] = pd.to_datetime(cleaned_navis_position['JULD'])

navis_position['JULD'] = pd.to_datetime(navis_position['JULD'])

  compiled_101 = pd.concat([compiled_101, temp_df], ignore_index= True)
100%|██████████| 88/88 [00:00<00:00, 109.17it/s]
  compiled_102 = pd.concat([compiled_102, temp_df], ignore_index= True)
100%|██████████| 93/93 [00:00<00:00, 110.42it/s]


In [5]:
# Output with all float positions, up to date.

full_position = pd.concat([position_df, navis_position], ignore_index = True)
full_position.to_csv(os.path.join(floats_dir, 'Float_positions.csv'))