# Convert and concatenate netCDF data into CSV
https://cds-beta.climate.copernicus.eu/datasets/sis-ecde-climate-indicators

In [1]:
import cdsapi
import zipfile
import netCDF4
import os
import numpy as np

### Functions

In [2]:
def get_and_unpack_data(dataset, request, dir_name):
    path_to_zip_file = f'{dir_name}.zip'
    directory_to_extract_to = f'data/{dir_name}'

    client = cdsapi.Client()
    client.retrieve(dataset, request, f'{dir_name}.zip')

    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall(directory_to_extract_to)
        os.remove(path_to_zip_file)

    file_names = os.listdir(f'data/{dir_name}')

    return file_names

In [3]:
def read_nc_file(file_name, dir_name):
    
    path_to_file = f'data/{dir_name}/{file_name}'
    file2read = netCDF4.Dataset(path_to_file,'r')
    keys = file2read.variables.keys()

    print(keys)
    print(file2read)
    #data = file2read.variables['time'][:]
    #print(data)
    print('\n\n\n')

    file2read.close()

In [4]:
def get_time_lat_lon(file_name, dir_name):
    
    path_to_file = f'data/{dir_name}/{file_name}'
    file2read = netCDF4.Dataset(path_to_file,'r')

    time = file2read.variables['time'][:]
    lat = file2read.variables['lat'][:]
    lon = file2read.variables['lon'][:]

    file2read.close()
    
    return time, lat, lon

In [15]:
def get_val_by_key(file_name, key, dir_name):
    
    path_to_file = f'data/{dir_name}/{file_name}'
    file2read = netCDF4.Dataset(path_to_file,'r')

    val = file2read.variables[key][:]
    print(val)
    file2read.close()
    
    return val

In [None]:
def show_unique_vals(arr):
    arr.ravel()
    arr = np.array(arr)
    print(np.unique(arr))

In [6]:
def iter_3D(matrix):
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            for k in range(matrix.shape[2]):
                yield i, j, k

### Reanalysis Data

In [7]:
dir_name = 'reanalysis'

dataset = "sis-ecde-climate-indicators"

request = {
    'variable': ['mean_temperature', 'hot_days', 'frost_days', 'duration_of_meteorological_droughts'],
    'origin': 'reanalysis',
    'temporal_aggregation': ['yearly'],
    'spatial_aggregation': 'gridded',
    'other_parameters': ['30_c']
}


In [8]:
file_names = get_and_unpack_data(dataset, request, dir_name)

for name in file_names:
    read_nc_file(name, dir_name)

2024-09-14 18:16:16,356 INFO Request ID is 04cfbfa4-0486-4834-bde7-712d682fc67e
2024-09-14 18:16:16,413 INFO status has been updated to accepted
2024-09-14 18:16:17,974 INFO status has been updated to running
2024-09-14 18:16:20,279 INFO status has been updated to successful
                                                                                         

dict_keys(['lat', 'lon', 'realization', 'time', 'dmd'])
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): lat(185), lon(271), time(84)
    variables(dimensions): float64 lat(lat), float64 lon(lon), int64 realization(), int64 time(time), int64 dmd(time, lat, lon)
    groups: 
[    0   366   731  1096  1461  1827  2192  2557  2922  3288  3653  4018
  4383  4749  5114  5479  5844  6210  6575  6940  7305  7671  8036  8401
  8766  9132  9497  9862 10227 10593 10958 11323 11688 12054 12419 12784
 13149 13515 13880 14245 14610 14976 15341 15706 16071 16437 16802 17167
 17532 17898 18263 18628 18993 19359 19724 20089 20454 20820 21185 21550
 21915 22281 22646 23011 23376 23742 24107 24472 24837 25203 25568 25933
 26298 26664 27029 27394 27759 28125 28490 28855 29220 29586 29951 30316]




dict_keys(['time', 'realization', 'lat', 'lon', 't2m'])
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    Con

In [9]:
### REMEMBER TO ADJUST NUMBER OF FILES AND GIVE APPROPIATE KEY NAMES!!!
time, lat, lon = get_time_lat_lon(file_names[0], dir_name)

mean_temperature = get_val_by_key(file_names[1], 't2m', dir_name)
hot_days = get_val_by_key(file_names[3], 't2m', dir_name)
frost_days = get_val_by_key(file_names[2], 't2m', dir_name)
droughts = get_val_by_key(file_names[0], 'dmd', dir_name)

In [10]:
OUTPUT_FILE_NAME = 'reanalysis.csv'
l = []

l.append('time,lat,lon,mean_t,hot_d,cold_d,droughts')

for i, j, k in iter_3D(mean_temperature):
    l.append('%d,%.2f,%.2f,%.2f,%d,%d,%d' %((time[i]/365 + 1940), lat[j], lon[k],
                                    mean_temperature[i, j, k]-273.15,
                                    hot_days[i, j, k],
                                    frost_days[i, j, k],
                                    droughts[i, j, k]))

with open(OUTPUT_FILE_NAME, 'w') as f:
    f.write("\n".join(l))

### Projections Data

In [21]:
dir_name = 'projections'

dataset = "sis-ecde-climate-indicators"

## mean: --, hot_days: 0, frost_days: --, droughts: --
# request = {
#     'variable': ['mean_temperature', 'hot_days', 'frost_days', 'duration_of_meteorological_droughts'],
#     'origin': 'projections',
#     'gcm': ['mpi_esm_lr'],
#     'rcm': ['cclm4_8_17'],
#     'experiment': ['rcp8_5'],
#     'ensemble_member': ['r1i1p1'],
#     'temporal_aggregation': ['yearly'],
#     'spatial_aggregation': 'gridded',
#     'other_parameters': ['30_c']
# }

request = {
    'variable': ['mean_temperature', 'hot_days', 'frost_days', 'duration_of_meteorological_droughts'],
    'origin': 'projections',
    'gcm': ['ec_earth'],
    'rcm': ['hirham5'],
    'experiment': ['rcp4_5'],
    'ensemble_member': ['r3i1p1'],
    'temporal_aggregation': ['yearly'],
    'spatial_aggregation': 'gridded',
    'other_parameters': ['40_c']
}

In [24]:
file_names = get_and_unpack_data(dataset, request, dir_name)

for name in file_names:
    read_nc_file(name, dir_name)

2024-09-14 20:25:52,153 INFO Request ID is 94b66ead-d5f3-4bba-8b45-1ab96d79b901
2024-09-14 20:25:52,207 INFO status has been updated to accepted
2024-09-14 20:25:53,769 INFO status has been updated to running
2024-09-14 20:25:56,086 INFO status has been updated to successful
                                                                                         

dict_keys(['lat', 'lon', 'time', 'dmd'])
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): lat(185), lon(271), time(150)
    variables(dimensions): float64 lat(lat), float64 lon(lon), int64 time(time), int64 dmd(time, lat, lon)
    groups: 
[    0   365   731  1096  1461  1826  2192  2557  2922  3287  3653  4018
  4383  4748  5114  5479  5844  6209  6575  6940  7305  7670  8036  8401
  8766  9131  9497  9862 10227 10592 10958 11323 11688 12053 12419 12784
 13149 13514 13880 14245 14610 14975 15341 15706 16071 16436 16802 17167
 17532 17897 18263 18628 18993 19358 19724 20089 20454 20819 21185 21550
 21915 22280 22646 23011 23376 23741 24107 24472 24837 25202 25568 25933
 26298 26663 27029 27394 27759 28124 28490 28855 29220 29585 29951 30316
 30681 31046 31412 31777 32142 32507 32873 33238 33603 33968 34334 34699
 35064 35429 35795 36160 36525 36890 37256 37621 37986 38351 38717 39082
 39447 39812 40178 40543 40908 41273 41639 

In [25]:
print(file_names)

['19_duration_of_meteorological_droughts-projections-yearly-rcp_4_5-hirham5-ec_earth-r3i1p1-grid-v1.0.nc', '11_frost_days-projections-yearly-rcp_4_5-hirham5-ec_earth-r3i1p1-grid-v1.0.nc', '06_hot_days-projections-yearly-40deg-rcp_4_5-hirham5-ec_earth-r3i1p1-grid-v1.0.nc', '01_mean_temperature-projections-yearly-rcp_4_5-hirham5-ec_earth-r3i1p1-grid-v1.0.nc']


In [31]:
### REMEMBER TO ADJUST NUMBER OF FILES AND GIVE APPROPIATE KEY NAMES!!!
time, lat, lon = get_time_lat_lon(file_names[0], dir_name)

mean_temperature = get_val_by_key(file_names[3], 'tasAdjust', dir_name)
hot_days = get_val_by_key(file_names[2], 'tasAdjust_NON_CDM', dir_name)
frost_days = get_val_by_key(file_names[1], 'tasAdjust_NON_CDM', dir_name)
droughts = get_val_by_key(file_names[0], 'dmd', dir_name)

[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


In [34]:
show_unique_vals(mean_temperature)
show_unique_vals(hot_days)
show_unique_vals(frost_days)
show_unique_vals(droughts)

[264.56644 264.5857  264.59436 ... 304.28815 304.2906        nan]
[  0.   1.   2.   3.   4.   5.   6.   7.   8.   9.  10.  11.  12.  13.
  14.  15.  16.  17.  18.  19.  20.  21.  22.  23.  24.  25.  26.  27.
  28.  29.  30.  31.  32.  33.  34.  35.  36.  37.  38.  39.  40.  41.
  42.  43.  44.  45.  46.  47.  48.  49.  50.  51.  52.  53.  54.  55.
  56.  57.  58.  59.  60.  61.  62.  63.  64.  65.  66.  67.  68.  69.
  70.  71.  72.  73.  74.  75.  76.  77.  78.  79.  80.  81.  82.  83.
  84.  85.  86.  87.  88.  89.  90.  91.  92.  93.  94.  95.  96.  97.
  98.  99. 100. 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. 111.
 112. 113. 114. 115. 116. 117. 118. 119. 120. 121. 122. 123. 124. 125.
 126. 127. 128. 129. 130. 131. 132. 133. 134. 135. 136. 137. 138. 139.
 140. 141. 142. 143. 144. 145. 146. 147. 148. 149. 150. 151. 152. 153.
 154. 155. 156. 157. 158. 159. 160. 161. 162. 163. 164. 165. 166. 167.
 168. 169. 170. 171. 172. 173. 174. 175. 176. 177. 178. 179. 180. 181.
 182. 183. 

In [38]:
OUTPUT_FILE_NAME = 'projections.csv'
START_YEAR = 2100-151

l = []

l.append('time,lat,lon,mean_t,hot_d,cold_d,droughts')

for i, j, k in iter_3D(mean_temperature):
    try:
        l.append('%d,%.2f,%.2f,%.2f,%d,%d,%d' %((time[i]/365 + START_YEAR), lat[j], lon[k],
                                        mean_temperature[i, j, k]-273.15,
                                        hot_days[i, j, k],
                                        frost_days[i, j, k],
                                        droughts[i, j, k]))
    except np.ma.core.MaskError:
        pass

with open(OUTPUT_FILE_NAME, 'w') as f:
    f.write("\n".join(l))

  l.append('%d,%.2f,%.2f,%.2f,%d,%d,%d' %((time[i]/365 + START_YEAR), lat[j], lon[k],


### Sandbox

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("projections.csv")

In [4]:
df.head()

Unnamed: 0,time,lat,lon,mean_t,hot_d,cold_d,droughts
0,1949,26.5,-12.25,18.86,8,0,4
1,1949,26.5,-12.0,18.7,9,0,2
2,1949,26.5,-11.75,19.38,11,0,2
3,1949,26.5,-11.5,19.92,13,0,3
4,1949,26.5,-11.25,20.13,15,0,3


In [9]:
test_arr = df[(df.mean_t > 14) & (df.mean_t < 15) & (df.time == 2000)][['lat', 'lon']].to_numpy()
test_arr[0]

array([30.75, -8.  ])

In [None]:
def get_coords(year, min_mean_t = -273.15, max_mean_t = 273.15):
    return df[(df.time == year) & (df.mean_t > min_mean_t) & (df.mean_t < max_mean_t)][['lat', 'lon']].to_numpy()