In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from datetime import date
today = date.today()
import re

In [2]:
# Read the Excel file into a pandas DataFrame
data = pd.read_excel('MIRAMARE_discrete.xlsx', header=0)
data['Datetime'] = pd.to_datetime(data['Date'].astype(str) + ' ' + data['UTC_time'].astype(str))

In [3]:
# Iterate over each column in the DataFrame

data.columns = [re.sub(r'[-%/]', '_', col) for col in data.columns]

for column in data.columns:
    # Convert the column to a NumPy array
    np_array = data[column].to_numpy()
    
    # Convert the data type to int32 or float32
    if np_array.dtype == 'int64':
        np_array = np_array.astype('int32')
    elif np_array.dtype == 'float64':
        np_array = np_array.astype('float32')
    
    # Delete the old NumPy array if it exists
    if column in locals():
        del locals()[column]
    
    # Use the modified column name as the variable name
    globals()[column] = np_array

In [4]:
time=Datetime

Lat=Latitude
Lon=Longitude

DO=DO_mL_L
Depth=Depth_m
Fixed_Station=Fixed_Station
PO4=PO4_umol_kg
Salinity=S_SBE19
Si=Si_umol_kg
Alkalinity= TA_umol_kg
Temperature=T_SBE19_degC
T_pH=T_at_pH_measurements


In [20]:
ds=xr.Dataset(
    data_vars=dict(
        Oxygen=(["time"], DO,{"units": "uatm",
                                            "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water",
                                            "short_name": "DO",
                                            "long_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water",
                                            "description": "Molarity of dissolved Oxygen in sea water measured by SBE37DO",
                                            "valid_min": "0",
                                            "valid_max": "400",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        Depth=(["time"], Depth,{"units": "m",
                                            "standard_name": "depth",
                                            "short_name": "depth",
                                            "long_name": "depth",
                                            "description": "depth from where the water sample is collected (m)",
                                            "valid_min": "0",
                                            "valid_max": "10000",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        
        Phosphate=(["time"],PO4,{"units": "umol/kg",
                                            "standard_name": "mole_concentration_of_phosphate_in_sea_water",
                                            "short_name": "PO4",
                                            "long_name": "mole_concentration_of_phosphate_in_sea_water",
                                            "description": "Molarity of dissolved Phosphate in sea water measured at discrete stations.",
                                            "valid_min": "0",
                                            "valid_max": "10000",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        Salinity=(["time"], Salinity,{"units": "psu",
                                            "standard_name": "sea_water_salinity",
                                            "short_name": "so",
                                            "long_name": "Sea Water Salinity",
                                            "description": "Sea water salinity is the salt content of sea water on the Practical Salinity Scale of 1978",
                                            "valid_min": "0",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        Silicate=(["time"], Si,{"units": "umol/kg",
                                            "standard_name": "mole_concentration_of_silicate_in_sea_water",
                                            "short_name": "SI",
                                            "long_name": "mole_concentration_of_silicate_in_sea_water",
                                            "description": "Molarity of dissolved Silicate in sea water measured at discrete stations.",
                                            "valid_min": "0",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        Alkalinity=(["time"],Alkalinity,{"units": "umol/kg",
                                            "standard_name":  "sea_water_total_alkalinity",
                                            "short_name": "alkalinity",
                                            "long_name": "Sea Water Total Alkalinity",
                                            "description": "Total alkalinity sea water measured at discrete stations.",
                                            "valid_min": "0",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
                

        
        Temperature=(["time"], Temperature,{"units": "degree Celcius",
                                            "standard_name": "sea_water_temperature",
                                            "short_name": "SW_Temperature",
                                            "long_name": "Sea Water Temperature",
                                            "description": "Sea water temperature is the in situ temperature of the sea water",
                                            "valid_min": "-10",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        T_pH=(["time"], T_pH,{"units": "degree Celcius",
                                            "standard_name": "sea_water_temperature_during_pH_measurement",
                                            "short_name": "TpH",
                                            "long_name": "Sea Water Temperature during pH measurement",
                                            "description": "temperature of the water during pH measurements",
                                            "valid_min": "-10",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""})
    ),
    coords=dict(
    lon=(["lon"], Lon,{"units": "degrees East",
                                        "standard_name": "longitude",
                                        "short_name": "lon",
                                        "long_name": "Longitude",
                                        "description": "The Longitude represents the degrees east",
                                        "valid_min": "0",
                                        "valid_max": "360",
                                        "missing_value": "-9999999",
                                        "history": ""}),
                       
    lat=(["lat"], Lat,{"units": "degrees North",
                                        "standard_name": "latitude",
                                        "short_name": "lat",
                                        "long_name": "Latitude",
                                        "description": "The latitude represents the degrees north",
                                        "valid_min": "-90",
                                        "valid_max": "90",
                                        "missing_value": "-9999999",
                                        "history": ""}),
    time=time,
    reference_time=time[0],
)
)





In [21]:
ds.attrs['id']='DOI needed'

ds.attrs['naming_authority']='Istituto Nazionale di Oceanografia e Geofisica Sperimentale (OGS), Trieste, Italy '

ds.attrs['title'] = "MIRAMARE_discrete"

ds.attrs['summary']='Discrete samples were collected and analysed for pH and Total Alkalinity (TA). Discrete pCO2 was calculated using CO2SYS (Pelletier et al., 2007), with the carbonate system constants from Lueker et al. (2000), the HSO4- constant from Dickson (1990), the total borate-salinity relationship of Lee et al. (2010), and the KF constant from Perez and Fraga (1987). pH was on total scale. The sensor pCO2 data (pCO2_corr) is corrected by adjusting these data to the calculated discrete pCO2.'

ds.attrs['keywords']='Hydrography'
ds.attrs['keywords_vocabulary']='Temperature, Salinity, DO, pH, PO, SI, Alk'

ds.attrs['geospatial_lat_min']=np.min(Lat)
ds.attrs['geospatial_lat_max']=np.max(Lat)
ds.attrs['geospatial_lon_miN']=np.min(Lon)
ds.attrs['geospatial_lon_max']=np.max(Lon)

ds.attrs['time_coverage_start']=str(np.min(time))
ds.attrs['time_coverage_end']=str(np.max(time))

ds.attrs['Conventions']='CF 1.11'

ds.attrs['processing_level']='LEVEL 2'

ds.attrs['date_created']=str(today)

ds.attrs['creator_type']='Group'
ds.attrs['creator_institution']='Geophysical Institute'
ds.attrs['creator_name']='Joan Mateu Horrach Pou'
ds.attrs['creator_email']='joan.pou@uib.no'
ds.attrs['creator_url']=''

ds.attrs['institution']='Geophysical Institute'

ds.attrs['publisher_name']='Joan Mateu Horrach Pou'
ds.attrs['publisher_email']='joan.pou@uib.no'
ds.attrs['publisher_url']='I need the url'

ds.attrs['project']='ICOS'

ds.attrs['platform']='MIRAMARE'
ds.attrs['platform_vocabulary']='MIRAMARE_discrete'

ds.attrs['instrument']='SBE19, '


ds.attrs['iso_topic_category']='Needed'
ds.attrs['activity_type']='in situ observation'
ds.attrs['operational_status']='active'


In [22]:
comp = dict(zlib=True, complevel=5)
encoding = {var: comp for var in ds.data_vars}
ds.to_netcdf('MIRAMARE_discrete.nc', encoding=encoding)



In [16]:
encoding


{'Oxygen': {'zlib': True, 'complevel': 9},
 'Depth': {'zlib': True, 'complevel': 9},
 'Station': {'zlib': True, 'complevel': 9},
 'Phosphate': {'zlib': True, 'complevel': 9},
 'Salinity': {'zlib': True, 'complevel': 9},
 'Silicate': {'zlib': True, 'complevel': 9},
 'Alkalinity': {'zlib': True, 'complevel': 9},
 'Temperature': {'zlib': True, 'complevel': 9},
 'T_pH': {'zlib': True, 'complevel': 9}}