In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from datetime import date
today = date.today()
import re

In [2]:
# Read the Excel file into a pandas DataFrame
data = pd.read_excel('W1M3A_sensor_final.xlsx', header=0)
cols=['Year','Month_UTC','Day_UTC']
data['date'] = data[cols].apply(lambda x: '-'.join(x.values.astype(str)), axis="columns")

cols=['Hour_UTC','Minute_UTC']
data['time'] = data[cols].apply(lambda x: ':'.join(x.values.astype(str)), axis="columns")


data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'])


In [3]:
data.columns = [re.sub(r'[-%/ ]', '_', col) for col in data.columns]

for column in data.columns:
    # Convert the column to a NumPy array
    np_array = data[column].to_numpy()
    
    # Convert the data type to int32 or float32
    if np_array.dtype == 'int64':
        np_array = np_array.astype('int32')
    elif np_array.dtype == 'float64':
        np_array = np_array.astype('float32')
    
    # Delete the old NumPy array if it exists
    if column in locals():
        del locals()[column]
    
    # Use the modified column name as the variable name
    globals()[column] = np_array

In [4]:
whos

Variable                                                Type         Data/Info
------------------------------------------------------------------------------
DO_MEDSEA_umol_m3                                       ndarray      31: 31 elems, type `float32`, 124 bytes
Day_UTC                                                 ndarray      31: 31 elems, type `int32`, 124 bytes
Depth_dbar                                              ndarray      31: 31 elems, type `int32`, 124 bytes
Fixed_Station                                           ndarray      31: 31 elems, type `object`, 248 bytes
Hour_UTC                                                ndarray      31: 31 elems, type `int32`, 124 bytes
Latitude                                                ndarray      31: 31 elems, type `float32`, 124 bytes
Longitude                                               ndarray      31: 31 elems, type `float32`, 124 bytes
Minute_UTC                                              ndarray      31: 31 elems, typ

In [13]:
time=datetime
Lat=Latitude
Lon=Longitude

DO=DO_MEDSEA_umol_m3
Press=Pres_atm_hPa
Depth=Depth_dbar
Salinity=_S_MEDSEA
Alkalinity= TA_MEDSEA_mol_m3
Temperature=T_SBE_degC
Air_temp=T_atm_degC
wind_speed=Wind_Speed_m_s
pCO2=pCO2_raw_uatm
pH_cmems=pH_available_from_MEDSEA_ANALYSISFORECAST_BGC_006_014

In [14]:
ds=xr.Dataset(
    data_vars=dict(
        Oxygen=(["time"], DO,{"units": "umol/m3",
                                            "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water",
                                            "short_name": "DO",
                                            "long_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water",
                                            "description": "Dissolved oxygen determined from MEDSEA_ANALYSISFORECAST_BGC_006_014 model(umol/m3)",
                                            "valid_min": "0",
                                            "valid_max": "400",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        Pressure=(["time"], Depth,{"units": "dbar",
                                            "standard_name": "pressure",
                                            "short_name": "pressure",
                                            "long_name": "pressure",
                                            "description": "pressure of the sensor",
                                            "valid_min": "0",
                                            "valid_max": "10000",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        atm_pressure=(["time"], Press,{"units": "hPa",
                                            "standard_name": "air_pressure",
                                            "short_name": "atmpress",
                                            "long_name": "Air presuure",
                                            "description": "Atmospheric pressure determined from ECMWF (hPa)",
                                            "valid_min": "0",
                                            "valid_max": "10000",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        

        Salinity=(["time"], Salinity,{"units": "psu",
                                            "standard_name": "sea_water_salinity",
                                            "short_name": "so",
                                            "long_name": "Sea Water Salinity",
                                            "description": "salinity determined from the MEDSEA_MULTIYEAR_PHY_006_004 model",
                                            "valid_min": "0",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
   
        
        pCO2=(["time"], pCO2,{"units": "uatm",
                                            "standard_name": "surface_partial_pressure_of_carbon_dioxide_in_sea_water",
                                            "short_name": "pCO2",
                                            "long_name": "Surface partial pressure of carbon dioxide in sea water ",
                                            "description": " pCO2 calculated from TA and pH",
                                            "valid_min": "0",
                                            "valid_max": "1000",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        Alkalinity=(["time"],Alkalinity,{"units": "umol/kg",
                                            "standard_name":  "sea_water_total_alkalinity",
                                            "short_name": "alkalinity",
                                            "long_name": "Sea Water Total Alkalinity",
                                            "description": "Total Alkalinity determined from the MEDSEA_ANALYSISFORECAST_BGC_006_014 model (mol/m3).",
                                            "valid_min": "0",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        pH=(["time"],pH_cmems,{"units": "pH",
                                            "standard_name":  "sea_water_pH",
                                            "short_name": "pH",
                                            "long_name": "Sea Water pH",
                                            "description": "pH determined from the MEDSEA_ANALYSISFORECAST_BGC_006_014 model ",
                                            "valid_min": "0",
                                            "valid_max": "14",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
                

        
        Temperature=(["time"], Temperature,{"units": "degree Celcius",
                                            "standard_name": "sea_water_temperature",
                                            "short_name": "SW_Temperature",
                                            "long_name": "Sea Water Temperature",
                                            "description": "Sea water temperature is the in situ temperature  measured by an SBE56.",
                                            "valid_min": "-10",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        
        air_temp=(["time"], Air_temp,{"units": "degree Celcius",
                                            "standard_name": "air_temperature",
                                            "short_name": "air_Temperature",
                                            "long_name": "Air Temperature",
                                            "description": "Temperature in air determined from ECMWF (degC)",
                                            "valid_min": "-10",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),       
       
        Wind_Speed=(["time"], wind_speed,{"units": "m/s",
                                            "standard_name": "wind_speed",
                                            "short_name": "wspd",
                                            "long_name": "Wind Speed",
                                            "description": "The wind speed is the magnitude of the wind velocity derived from ECMWF",
                                            "valid_min": "0",
                                            "valid_max": "400",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
  
    ),
    coords=dict(
    lon=(["lon"], Lon,{"units": "degrees East",
                                        "standard_name": "longitude",
                                        "short_name": "lon",
                                        "long_name": "Longitude",
                                        "description": "The Longitude represents the degrees east",
                                        "valid_min": "0",
                                        "valid_max": "360",
                                        "missing_value": "-9999999",
                                        "history": ""}),
                       
    lat=(["lat"], Lat,{"units": "degrees North",
                                        "standard_name": "latitude",
                                        "short_name": "lat",
                                        "long_name": "Latitude",
                                        "description": "The latitude represents the degrees north",
                                        "valid_min": "-90",
                                        "valid_max": "90",
                                        "missing_value": "-9999999",
                                        "history": ""}),
    time=time,
    reference_time=time[0],
)
)





In [15]:
ds.attrs['id']='DOI needed'

ds.attrs['naming_authority']='Istituto Nazionale di Oceanografia e Geofisica Sperimentale (OGS), Trieste, Italy '

ds.attrs['title'] = "W1M3A"

ds.attrs['summary']='The dataset described here is collected during the ATL2MED demonstration experiment (see below) and consists of sensor data and metadata.'
ds.attrs['keywords']='Hydrography'
ds.attrs['keywords_vocabulary']='Temperature, Salinity, DO, pH, wind, Alk'

ds.attrs['geospatial_lat_min']=np.min(Lat)
ds.attrs['geospatial_lat_max']=np.max(Lat)
ds.attrs['geospatial_lon_miN']=np.min(Lon)
ds.attrs['geospatial_lon_max']=np.max(Lon)

ds.attrs['time_coverage_start']=str(np.min(time))
ds.attrs['time_coverage_end']=str(np.max(time))

ds.attrs['Conventions']='CF 1.11'

ds.attrs['processing_level']='LEVEL 2'

ds.attrs['date_created']=str(today)

ds.attrs['creator_type']='Group'
ds.attrs['creator_institution']='Geophysical Institute'
ds.attrs['creator_name']='Joan Mateu Horrach Pou'
ds.attrs['creator_email']='joan.pou@uib.no'
ds.attrs['creator_url']=''

ds.attrs['institution']='Geophysical Institute'

ds.attrs['publisher_name']='Joan Mateu Horrach Pou'
ds.attrs['publisher_email']='joan.pou@uib.no'
ds.attrs['publisher_url']='I need the url'

ds.attrs['project']='ICOS'

ds.attrs['platform']='W1M3A'
ds.attrs['platform_vocabulary']='W1M3A_sation'

ds.attrs['instrument']='SBE56. ECMWF model, CMEMS model'


ds.attrs['iso_topic_category']='Needed'
ds.attrs['activity_type']='in situ observation, model output, fixed station'
ds.attrs['operational_status']='active'


In [16]:
comp = dict(zlib=True, complevel=5)
encoding = {var: comp for var in ds.data_vars}
ds.to_netcdf('W1M3A_station.nc', encoding=encoding)

