# Download BGC Argo Floats data
This notebook must be used to download files from a given float WMO.


In [1]:
import polars as pl
import pandas as pd
import requests
import shutil
import os
import gzip
from pathlib import Path
import re
from tqdm import tqdm
from urllib.request import urlretrieve
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib as mpl

In [2]:
host = 'https://data-argo.ifremer.fr/argo_synthetic-profile_index.txt'
dac = 'https://data-argo.ifremer.fr/dac'
data_directory = 'C:/Users/flapet/OneDrive - NOC/Documents/IDAPro/lib/db_building/data'
filename = 'synth_file.txt'
destination = os.path.join(data_directory, filename)

In [3]:
response = requests.get(host, stream=True)

with open(destination, "wb") as f:
    r = requests.get(host)
    f.write(r.content)


In [4]:
index_table = pl.read_csv(destination, skip_rows=8)
index_table.height

360854

In [5]:
def extract_digits(text):
    match = re.search(r'\d+', text)
    return match.group() if match else 0

In [6]:
# Mutate a new column based on the regular expression extraction
index_table = index_table.with_columns(
    pl.col('file').map_elements(lambda x: extract_digits(x), return_dtype=pl.Utf8).alias('wmo')
)
print(index_table)

shape: (360_854, 11)
┌────────────┬────────────┬──────────┬───────────┬───┬───────────┬───────────┬───────────┬─────────┐
│ file       ┆ date       ┆ latitude ┆ longitude ┆ … ┆ parameter ┆ parameter ┆ date_upda ┆ wmo     │
│ ---        ┆ ---        ┆ ---      ┆ ---       ┆   ┆ s         ┆ _data_mod ┆ te        ┆ ---     │
│ str        ┆ i64        ┆ f64      ┆ f64       ┆   ┆ ---       ┆ e         ┆ ---       ┆ str     │
│            ┆            ┆          ┆           ┆   ┆ str       ┆ ---       ┆ i64       ┆         │
│            ┆            ┆          ┆           ┆   ┆           ┆ str       ┆           ┆         │
╞════════════╪════════════╪══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═════════╡
│ aoml/19007 ┆ 2006102202 ┆ -40.316  ┆ 73.389    ┆ … ┆ PRES TEMP ┆ DDDD      ┆ 202206280 ┆ 1900722 │
│ 22/profile ┆ 1624       ┆          ┆           ┆   ┆ PSAL DOXY ┆           ┆ 80801     ┆         │
│ s/SD190072 ┆            ┆          ┆           ┆   ┆           ┆    

In [10]:
wmo = '7902223'
wmo_table = index_table.filter(pl.col('wmo') == wmo)
wmo_table.height

48

In [11]:
local_argo_directory = data_directory + '/argo_nc'
wmo_directory = local_argo_directory + '/' + wmo
dac = 'https://data-argo.ifremer.fr/dac'
aux = 'https://data-argo.ifremer.fr/aux'
Path(wmo_directory).mkdir(parents=True, exist_ok=True)



Download the merged Sprof of all profiles

In [12]:
dac_name = wmo_table['file'][0].split('/', 1)[0]
download_url = dac + '/' + dac_name + '/' + wmo + '/' + wmo + '_Sprof.nc'
print(download_url)

https://data-argo.ifremer.fr/dac/coriolis/7902223/7902223_Sprof.nc


In [13]:

filename = wmo_directory + '/' + download_url.rsplit('/', 1)[1]
urlretrieve(download_url, filename)

('C:/Users/flapet/OneDrive - NOC/Documents/IDAPro/lib/db_building/data/argo_nc/7902223/7902223_Sprof.nc',
 <http.client.HTTPMessage at 0x27f0c9f0a30>)

Download all the spearate profiles

In [14]:
for i in tqdm(wmo_table['file']):
    download_url = dac + '/' + i
    filename = wmo_directory + '/' + download_url.rsplit('/', 1)[1]
    urlretrieve(download_url, filename)



100%|██████████| 48/48 [00:10<00:00,  4.54it/s]


# Download auxiliary files

In [None]:
#Scrap the list of the nc file in the aux directory
URL = aux + '/coriolis/1902601/profiles/'

tables = pd.read_html(URL) # Returns list of all tables on page
table = tables[0] # Select table of interest

In [None]:
#Format that table to remove na and non nc files
nc_table = table.dropna(subset=['Name'])
nc_table = nc_table[nc_table['Name'].str.endswith('.nc')]

In [None]:
print(wmo_directory + download_url.rsplit('/', 1)[1])

In [None]:
for i in tqdm(nc_table['Name']):
    download_url = URL + '/' + i
    filename = wmo_directory + '/' + download_url.rsplit('/', 1)[1]
    urlretrieve(download_url, filename)

## Play with ed data

In [None]:
filename = 'argo_nc/1902601/R1902601_020_aux.nc'
filepath = os.path.join(data_directory, filename)
data = xr.open_dataset(filepath)
data = data.rename({'CYCLE_NUMBER':'PROF_NUM'}).swap_dims({'N_PROF':'PROF_NUM'})
display(data)

In [None]:
ed = data['RAW_DOWNWELLING_IRRADIANCE']
df = ed.to_dataframe(name='value')

# Reset the index to make levels as a column
df_reset = df.reset_index()

# Create a unique identifier for each row
df_reset['index'] = df_reset.groupby('N_LEVELS').cumcount()

# Pivot the DataFrame
df_pivot = df_reset.pivot(columns='N_VALUES70', values='value')

In [None]:
df_reset.head()

In [None]:
import seaborn as sns
cmap = sns.cubehelix_palette(as_cmap=True)

plt.figure(figsize=(12,6))
df_reset.plot('N_VALUES70', 'value', color=df_reset['N_LEVELS'], cmap=cmap)
plt.grid(alpha=0.5)
plt.show()

In [None]:
import numpy as np
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)

for i in df_reset['N_LEVELS'].unique():
    dat = df_reset[df_reset['N_LEVELS'] == i]
    ax1.plot(dat['N_VALUES70'], dat['value'])

colormap = plt.colormaps['viridis'] #nipy_spectral, Set1,Paired   
colors = [colormap(i) for i in np.linspace(0, 1,len(ax1.lines))]
for i,j in enumerate(ax1.lines):
    j.set_color(colors[i])
  

ax1.legend(loc=2)

In [None]:
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize

# Plotting
fig, ax = plt.subplots()

df_reset = df_reset[df_reset['N_LEVELS'] < 200]
# Create a ScalarMappable for colormap
norm = Normalize(vmin=df_reset['N_LEVELS'].min(), vmax=df_reset['N_LEVELS'].max())
cmap = plt.get_cmap('viridis')  # You can choose any colormap you prefer
sm = ScalarMappable(norm=norm, cmap=cmap)
sm.set_array([])

# Plot each spectrum
for depth, group in df_reset.groupby('N_LEVELS'):
    ax.plot(group['N_VALUES70'], group['value'], color=cmap(norm(depth)))

# Add colorbar
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label('Depth')

# Set labels and title
ax.set_xlabel('Wavelength')
ax.set_ylabel('Absorption')
ax.set_title('Raw downard light')

# Show legend
ax.legend()

plt.show()