In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from datetime import date
today = date.today()
import re

In [2]:
# Read the Excel file into a pandas DataFrame
data = pd.read_excel('PALOMA_auxiliary_CTD.xlsx', header=0)
data

Unnamed: 0,Fixed_Station,YYYY-MM-DD hh:mm:ss,Longitude,Latitude,Depth_dbar,T_SBE37_degC,S_SBE37
0,PALOMA,2020-07-15 11:41:09,13.5658,45.6204,0.633,22.5441,37.1831
1,PALOMA,2020-07-15 11:42:09,13.5658,45.6204,0.660,22.4708,36.9997
2,PALOMA,2020-07-15 11:43:09,13.5658,45.6204,0.692,22.5128,36.9330
3,PALOMA,2020-07-15 11:44:09,13.5658,45.6204,0.917,22.3996,36.9347
4,PALOMA,2020-07-15 11:45:09,13.5658,45.6204,0.744,22.5016,36.8353
...,...,...,...,...,...,...,...
314,PALOMA,2020-07-15 17:28:09,13.5658,45.6204,0.590,22.5488,36.9773
315,PALOMA,2020-07-15 17:29:09,13.5658,45.6204,0.650,22.5478,36.9784
316,PALOMA,2020-07-15 17:30:09,13.5658,45.6204,0.596,22.5483,36.9817
317,PALOMA,2020-07-15 17:31:09,13.5658,45.6204,0.589,22.5361,36.9848


In [7]:
# Iterate over each column in the DataFrame

data.columns = [re.sub(r'[-%/: ]', '_', col) for col in data.columns]

for column in data.columns:
    # Convert the column to a NumPy array
    np_array = data[column].to_numpy()
    
    # Convert the data type to int32 or float32
    if np_array.dtype == 'int64':
        np_array = np_array.astype('int32')
    elif np_array.dtype == 'float64':
        np_array = np_array.astype('float32')
    
    # Delete the old NumPy array if it exists
    if column in locals():
        del locals()[column]
    
    # Use the modified column name as the variable name
    globals()[column] = np_array

In [8]:
whos

Variable              Type         Data/Info
--------------------------------------------
Depth_dbar            ndarray      319: 319 elems, type `float32`, 1276 bytes
Fixed_Station         ndarray      319: 319 elems, type `object`, 2552 bytes
Latitude              ndarray      319: 319 elems, type `float32`, 1276 bytes
Longitude             ndarray      319: 319 elems, type `float32`, 1276 bytes
S_SBE37               ndarray      319: 319 elems, type `float32`, 1276 bytes
T_SBE37_degC          ndarray      319: 319 elems, type `float32`, 1276 bytes
YYYY_MM_DD_hh:mm:ss   ndarray      319: 319 elems, type `object`, 2552 bytes
YYYY_MM_DD_hh_mm_ss   ndarray      319: 319 elems, type `object`, 2552 bytes
column                str          S_SBE37
data                  DataFrame        Fixed_Station  YYYY_M<...>n\n[319 rows x 7 columns]
date                  type         <class 'datetime.date'>
np                    module       <module 'numpy' from '/ho<...>kages/numpy/__init__.py'>
np_ar

In [9]:
time=YYYY_MM_DD_hh_mm_ss
Lat=Latitude
Lon=Longitude

Depth=Depth_dbar


Salinity=S_SBE37

Temperature=T_SBE37_degC


In [10]:
ds=xr.Dataset(
    data_vars=dict(

        Depth=(["time"], Depth,{"units": "m",
                                            "standard_name": "depth",
                                            "short_name": "depth",
                                            "long_name": "depth",
                                            "description": "depth (pressure) of sensor (dbar)",
                                            "valid_min": "0",
                                            "valid_max": "10000",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),
        

        Salinity=(["time"], Salinity,{"units": "psu",
                                            "standard_name": "sea_water_salinity",
                                            "short_name": "so",
                                            "long_name": "Sea Water Salinity",
                                            "description": "Sea water salinity is the salt content of sea water on the Practical Salinity Scale, measured by SBE37 deployed from the research vessel",
                                            "valid_min": "0",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""}),


        Temperature=(["time"], Temperature,{"units": "degree Celcius",
                                            "standard_name": "sea_water_temperature",
                                            "short_name": "SW_Temperature",
                                            "long_name": "Sea Water Temperature",
                                            "description": "Sea water temperature is the in situ temperature of the sea water measured by the SBE37 deployed from the research vessel",
                                            "valid_min": "-10",
                                            "valid_max": "100",
                                            "missing_value": "-9999999",
                                            "axis": "time",
                                            "history": ""})
        

    ),
    coords=dict(
    lon=(["lon"], Lon,{"units": "degrees East",
                                        "standard_name": "longitude",
                                        "short_name": "lon",
                                        "long_name": "Longitude",
                                        "description": "The Longitude represents the degrees east",
                                        "valid_min": "0",
                                        "valid_max": "360",
                                        "missing_value": "-9999999",
                                        "history": ""}),
                       
    lat=(["lat"], Lat,{"units": "degrees North",
                                        "standard_name": "latitude",
                                        "short_name": "lat",
                                        "long_name": "Latitude",
                                        "description": "The latitude represents the degrees north",
                                        "valid_min": "-90",
                                        "valid_max": "90",
                                        "missing_value": "-9999999",
                                        "history": ""}),
    time=time,
    reference_time=time[0],
)
)





In [11]:
ds.attrs['id']='DOI needed'

ds.attrs['naming_authority']='Istituto Nazionale di Oceanografia e Geofisica Sperimentale (OGS), Trieste, Italy '

ds.attrs['title'] = "Paloma_auxCTD"

ds.attrs['summary']='CTD taken as auxiliary data for the PALOMA sation using an SBE37'
ds.attrs['keywords']='Hydrography'
ds.attrs['keywords_vocabulary']='Temperature, Salinity, Pressure'

ds.attrs['geospatial_lat_min']=np.min(Lat)
ds.attrs['geospatial_lat_max']=np.max(Lat)
ds.attrs['geospatial_lon_miN']=np.min(Lon)
ds.attrs['geospatial_lon_max']=np.max(Lon)

ds.attrs['time_coverage_start']=str(np.min(time))
ds.attrs['time_coverage_end']=str(np.max(time))

ds.attrs['Conventions']='CF 1.14'

ds.attrs['processing_level']='LEVEL 2'

ds.attrs['date_created']=str(today)

ds.attrs['creator_type']='Group'
ds.attrs['creator_institution']='Geophysical Institute'
ds.attrs['creator_name']='Joan Mateu Horrach Pou'
ds.attrs['creator_email']='joan.pou@uib.no'
ds.attrs['creator_url']=''

ds.attrs['institution']='Geophysical Institute'

ds.attrs['publisher_name']='Joan Mateu Horrach Pou'
ds.attrs['publisher_email']='joan.pou@uib.no'
ds.attrs['publisher_url']='I need the url'

ds.attrs['project']='ICOS'

ds.attrs['platform']='PALOMA'
ds.attrs['platform_vocabulary']='PALOMA_auxCTD'

ds.attrs['instrument']='SBE37'


ds.attrs['iso_topic_category']='Needed'
ds.attrs['activity_type']='in situ observation'
ds.attrs['operational_status']='active'


In [12]:
comp = dict(zlib=True, complevel=5)
encoding = {var: comp for var in ds.data_vars}
ds.to_netcdf('Paloma_auxCTD.nc', encoding=encoding)
