# Convert and concatenate netCDF data into CSV
https://cds-beta.climate.copernicus.eu/datasets/sis-ecde-climate-indicators

In [1]:
import cdsapi
import zipfile
import netCDF4
import os

### Functions

In [2]:
def get_and_unpack_data(dataset, request, dir_name):
    path_to_zip_file = f'{dir_name}.zip'
    directory_to_extract_to = f'data/{dir_name}'

    client = cdsapi.Client()
    client.retrieve(dataset, request, f'{dir_name}.zip')

    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall(directory_to_extract_to)
        os.remove(path_to_zip_file)

    file_names = os.listdir(f'data/{dir_name}')

    return file_names

In [3]:
def read_nc_file(file_name, dir_name):
    
    path_to_file = f'data/{dir_name}/{file_name}'
    file2read = netCDF4.Dataset(path_to_file,'r')
    keys = file2read.variables.keys()

    print(keys)
    print(file2read)
    data = file2read.variables['time'][:]
    print(data)
    print('\n\n\n')

    file2read.close()

In [4]:
def get_time_lat_lon(file_name, dir_name):
    
    path_to_file = f'data/{dir_name}/{file_name}'
    file2read = netCDF4.Dataset(path_to_file,'r')

    time = file2read.variables['time'][:]
    lat = file2read.variables['lat'][:]
    lon = file2read.variables['lon'][:]

    file2read.close()
    
    return time, lat, lon

In [5]:
def get_val_by_key(file_name, key, dir_name):
    
    path_to_file = f'data/{dir_name}/{file_name}'
    file2read = netCDF4.Dataset(path_to_file,'r')

    val = file2read.variables[key][:]
    file2read.close()
    
    return val

In [6]:
def iter_3D(matrix):
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            for k in range(matrix.shape[2]):
                yield i, j, k

### Reanalysis Data

In [7]:
dir_name = 'reanalysis'

dataset = "sis-ecde-climate-indicators"

request = {
    'variable': ['mean_temperature', 'hot_days', 'frost_days', 'duration_of_meteorological_droughts'],
    'origin': 'reanalysis',
    'temporal_aggregation': ['yearly'],
    'spatial_aggregation': 'gridded',
    'other_parameters': ['30_c']
}


In [8]:
file_names = get_and_unpack_data(dataset, request, dir_name)

for name in file_names:
    read_nc_file(name, dir_name)

2024-09-14 18:16:16,356 INFO Request ID is 04cfbfa4-0486-4834-bde7-712d682fc67e
2024-09-14 18:16:16,413 INFO status has been updated to accepted
2024-09-14 18:16:17,974 INFO status has been updated to running
2024-09-14 18:16:20,279 INFO status has been updated to successful
                                                                                         

dict_keys(['lat', 'lon', 'realization', 'time', 'dmd'])
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): lat(185), lon(271), time(84)
    variables(dimensions): float64 lat(lat), float64 lon(lon), int64 realization(), int64 time(time), int64 dmd(time, lat, lon)
    groups: 
[    0   366   731  1096  1461  1827  2192  2557  2922  3288  3653  4018
  4383  4749  5114  5479  5844  6210  6575  6940  7305  7671  8036  8401
  8766  9132  9497  9862 10227 10593 10958 11323 11688 12054 12419 12784
 13149 13515 13880 14245 14610 14976 15341 15706 16071 16437 16802 17167
 17532 17898 18263 18628 18993 19359 19724 20089 20454 20820 21185 21550
 21915 22281 22646 23011 23376 23742 24107 24472 24837 25203 25568 25933
 26298 26664 27029 27394 27759 28125 28490 28855 29220 29586 29951 30316]




dict_keys(['time', 'realization', 'lat', 'lon', 't2m'])
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    Con

In [9]:
### REMEMBER TO ADJUST NUMBER OF FILES AND GIVE APPROPIATE KEY NAMES!!!
time, lat, lon = get_time_lat_lon(file_names[0], dir_name)

mean_temperature = get_val_by_key(file_names[1], 't2m', dir_name)
hot_days = get_val_by_key(file_names[3], 't2m', dir_name)
frost_days = get_val_by_key(file_names[2], 't2m', dir_name)
droughts = get_val_by_key(file_names[0], 'dmd', dir_name)

In [10]:
OUTPUT_FILE_NAME = 'reanalysis.csv'
l = []

l.append('time, lat, lon, mean_t, hot_d, cold_d, droughts')

for i, j, k in iter_3D(mean_temperature):
    l.append('%d,%.2f,%.2f,%.2f,%d,%d,%d' %((time[i]/365 + 1940), lat[j], lon[k],
                                    mean_temperature[i, j, k]-273.15,
                                    hot_days[i, j, k],
                                    frost_days[i, j, k],
                                    droughts[i, j, k]))

with open(OUTPUT_FILE_NAME, 'w') as f:
    f.write("\n".join(l))