This notebook is meant to download data from copernicus CMIP6.

Data source : https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=form

In [1]:
### User input
global_variable = 'pr'
name_variable = 'precipitation'

temporal_resolution = 'daily'

y_start = 1950
y_end = 2014

# wind register at 10 m

# Functions and packages

In [2]:
import pandas as pd
import numpy as np
import numpy.ma as ma
import netCDF4 as nc#not directly used but needs to be imported for some nc4 files manipulations, use for nc files
from netCDF4 import Dataset
import xarray as xr
import datetime # to have actual date
import os
import os.path
import cdsapi # for copernicus function
import shutil
from datetime import datetime

# Out path

In [3]:
out_path=r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets'

# Project information

In [4]:
name_projects = np.array(['WTP_Mutua_EIB', 'Gorongosa_EIB', 'Chimoio_WTP_EIB', 'Pemba_EIB'])

lon_projects_data = np.array([34.5927839939706, 34.07824286310398 , 33.47333313659342, 40.52545156033736])
lon_projects = pd.Series(lon_projects_data)

lat_projects_data = np.array([-19.495079648575242, -18.68063728746643, -19.125095255188334,-12.973942656747809])
lat_projects = pd.Series(lat_projects_data)
buffer_area_project=2
area_projects = [lat_projects - buffer_area_project, lat_projects+buffer_area_project, lon_projects-buffer_area_project,lon_projects+buffer_area_project] # list format

In [5]:
lat_projects[0]

-19.495079648575242

# Class

### Calendar class

In [6]:
# class to define parameter of time that remain constant durinf the whole script
class calendar:
    default_month = [ 
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
                ]
    default_day = [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
                '13', '14', '15',
                '16', '17', '18',
                '19', '20', '21',
                '22', '23', '24',
                '25', '26', '27',
                '28', '29', '30',
                '31',
                ]
    #actual_date = datetime.date.today()
    #actual_year = actual_date.year

### Copernicus class

In [7]:
## Definition of tuples that will be useful to search which data are available or not
# make it tuples to make unchangeable
class copernicus_elements:
    # there is 58 models
    models =('access_cm2','awi_cm_1_1_mr','bcc_csm2_mr','cams_csm1_0','canesm5_canoe','cesm2_fv2','cesm2_waccm_fv2','cmcc_cm2_hr4','cmcc_esm2','cnrm_cm6_1_hr','e3sm_1_0','e3sm_1_1_eca','ec_earth3_aerchem','ec_earth3_veg','fgoals_f3_l','fio_esm_2_0','giss_e2_1_g','hadgem3_gc31_ll','iitm_esm','inm_cm5_0','ipsl_cm6a_lr','kiost_esm','miroc6','miroc_es2l','mpi_esm1_2_hr','mri_esm2_0','norcpm1','noresm2_mm','taiesm1','access_esm1_5','awi_esm_1_1_lr','bcc_esm1','canesm5','cesm2','cesm2_waccm','ciesm','cmcc_cm2_sr5','cnrm_cm6_1','cnrm_esm2_1','e3sm_1_1','ec_earth3','ec_earth3_cc','ec_earth3_veg_lr','fgoals_g3','gfdl_esm4','giss_e2_1_h','hadgem3_gc31_mm','inm_cm4_8','ipsl_cm5a2_inca','kace_1_0_g','mcm_ua_1_0','miroc_es2h','mpi_esm_1_2_ham','mpi_esm1_2_lr','nesm3','noresm2_lm','sam0_unicon','ukesm1_0_ll')
    experiments = ('ssp1_1_9','ssp1_2_6','ssp4_3_4','ssp5_3_4os','ssp2_4_5','ssp4_6_0','ssp3_7_0','ssp5_8_5')
    #'ssp1_1_9',
    experiments_historical=('historical',)

## Functions

In [8]:
# separer la fonciton qui download de celle qui fait les files ?? Comme ca possible de choisir closest lat et lon avec fichier existant,
# ou bien laisser comme ca mais trouver un momyen pour que ca run pas a chaque fois, ca doit runner une fois pour chaque projet

In [9]:
########################################### Register data from nc file of Copernicus ############################################
# Aim of the function: this function aims to register in a dataframe and a csv file the data from the nc file downloaded with
# the function copernicus_data
# Actions of this function
#     1) Create the string indicating the period of interest
#     2) Creating path and file name to register dataframe in csv file
#     3) Register data, with its corresponding experiments and models, in dataframe and csv file
#        3 a) Test if path does not exists (if dataframe is not registered) : 
#                1 . Thanks to copernicus_data, download nc fils from copernicus CMIP6 website for each experiment and each model
#                2 . Open the dowloaded nc file in the jupyter notebook if it exists
#                3 . In a dataframe, register the value in the nc file, for each experiment, model and day
#                4 . If there no value for each experiments and models tested, the datfram is empty and the user is informed
#        3 b) Test if path exists (dataframe is registered) : no need to register again, return in dataframe the existing 
#             csv file in a dataframe

# Parameters of the function
# temporal_resolution: 'daily', 'monthly', or 'fixed'. String type 
# year_str: list containing all the years under the string type and in the period of interest
# experiments: copernicus_elements.experiments
# models: copernicus_elements.models
# out_path: path were the outputs are registered. Defined by the user at the beginning of the code 
# global_variable: global name of the climate variable of interest (example: Wind)
# name_variable: name of the elements downloaded from copernicus (example: 'near_surface_wind_speed')
# name_project: Name of the project for which the data are taken
# area: list containing latitudes and logitudes around the project

def csv_copernicus(temporal_resolution,year_str,experiments,models,out_path, global_variable, name_variable, name_projects,area,lat_projects,lon_projects,source):    
    ### PROBLEM WITH DATES, CAN T just pass one year. year str is a list, so if one year (2020,)
    ## PROBLEM WITH PATH: not coherent between data csv, datasets, download. And not achieving to have project name in path for dataset
    ## maybe the name for dataset is too long, but even if end at name project, does not work. Try doing one string with name project in it
    ## PROBLEM WITH PATH: WORK BUT NOT IDEAL
    ## pourquoi mettre toutes les donnees dans un dataframe ?? permet d'avoir cette organisation en multiindex. Sinon, on ne peut pas faire ca
    df_final = []
    
    # create string for name of folder depending on type of period
    if temporal_resolution == 'fixed':
        period = 'fixed'
    else:
        period=year_str[0]+'-'+year_str[len(year_str)-1]
    
    (dates, index_dates)=date_copernicus(temporal_resolution,year_str) # create time vector depending on temporal resolution
    
    k = 0 # to find closest latitude and longitude for each project, without making the loop for each, ssp, each model, year and project
    i = 0 # to have indexes of projects    
        
    for name_project in name_projects:
        print('############################### Project name: '+name_project+' ###############################')
        
        # modification on name_project str to ensure no problem whent using this str as name of a folder
        name_project = name_project.replace('-','_') # take off every blank space of project names
        name_project = name_project.replace('/','_') # take off every / of project names
        name_project = name_project.replace(r'"\"','_') # take off every \ of project names
        # brackets shouldn't be a problem for name projects

        title_file = name_project +'_' +period+ '_' + temporal_resolution + '_' +name_variable+'.csv'

        path_for_csv = os.path.join(out_path,'csv',source,name_variable,name_project,period) # create path for csv file

        if not os.path.isdir(path_for_csv): # test if the data were already downloaded; if not, first part if the if is applied
            os.makedirs(path_for_csv) # to ensure the creation of the path
            # the dataframe_copernicus functions aims to test if the data with the specific parameters exists (with copernicus_data)
            # and then produce a csv file if the data exists
            if k == 0:
                (df,k,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon)=dataframe_copernicus(temporal_resolution,year_str,experiments,models,out_path, global_variable, name_variable, name_project,[area[0][k],area[1][k],area[2][k],area[3][k]],lat_projects[k],lon_projects[k],period,index_dates,dates,path_for_csv,title_file,source,k,i)
            if k ==1:
                (df,k,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon)=dataframe_copernicus(temporal_resolution,year_str,experiments,models,out_path, global_variable, name_variable, name_project,[area[0][k],area[1][k],area[2][k],area[3][k]],lat_projects[k],lon_projects[k],period,index_dates,dates,path_for_csv,title_file,source,k,i,index_closest_lat_d=index_closest_lat,index_closest_lon_d=index_closest_lon,closest_value_lat_d=closest_value_lat,closest_value_lon_d=closest_value_lon)
                
            #return df
        else:# test if the data were already downloaded; if yes, this part of the if is applied
            if len(os.listdir(path_for_csv)) == 0: #test if the directory is empty
                # the csv file does not exist, even if the path exist
                # the dataframe_copernicus functions aims to test if the data with the specific parameters exists (with copernicus_data)
                # and then produce a csv file if the data exists
                if k == 0:
                    (df,k,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon)=dataframe_copernicus(temporal_resolution,year_str,experiments,models,out_path, global_variable, name_variable, name_project,[area[0][k],area[1][k],area[2][k],area[3][k]],lat_projects[k],lon_projects[k],period,index_dates,dates,path_for_csv,title_file,source,k,i)
                    
                if k == 1:
                    (df,k,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon)=dataframe_copernicus(temporal_resolution,year_str,experiments,models,out_path, global_variable, name_variable, name_project,[area[0][k],area[1][k],area[2][k],area[3][k]],lat_projects[k],lon_projects[k],period,index_dates,dates,path_for_csv,title_file,source,k,i,index_closest_lat_d=index_closest_lat,index_closest_lon_d=index_closest_lon,closest_value_lat_d=closest_value_lat,closest_value_lon_d=closest_value_lon)
            else: # the directory is not empty
                df=file_already_downloaded(path_for_csv,title_file,name_variable)
                
        #df_final = pd.concat([df_final,df])
        i+=1 # iterate indexes projects

    return df#df_final

In [10]:
# the dataframe_copernicus functions aims to test if the data with the specific parameters exists (with copernicus_data)
# and then produce a csv file if the data exists

def dataframe_copernicus(temporal_resolution,year_str,experiments,models,out_path, global_variable, name_variable, name_project,area,lat_project,lon_project,period,index_dates,dates,path_for_csv,title_file,source,k,i,index_closest_lat_d=[],index_closest_lon_d=[],closest_value_lat_d=[],closest_value_lon_d=[]):    
    print('FUNCTION DATAFRAME_COPERNICUS')
    print('k = '+str(k))
    df = pd.DataFrame() # create an empty dataframe
    for SSP in experiments:
        experiment = (SSP,) # create tuple for iteration of dataframe
        print('Test with scenario '+SSP)
        for model_simulation in models:
            model =(model_simulation,) # create tuple for iteration of dataframe
            print('Test with model '+model_simulation)
            # path were the futur downloaded file is registered
            path_for_file= os.path.join(out_path,name_variable,name_project,SSP,model_simulation,period)
            # existence of path_for_file tested in copernicus function
            climate_variable_path=copernicus_data(temporal_resolution,SSP,name_variable,model_simulation,year_str,area,path_for_file,out_path,name_project,source)
            # area is determined in the "Load shapefiles and plot" part
            if (climate_variable_path is not None):
                if k == 0:
                    # enter here only one time
                    print('suppose to enter once')
                    (index_closest_lat_d,index_closest_lon_d,closest_value_lat_d,closest_value_lon_d)=_lat_lon(climate_variable_path,lat_projects,lon_projects)

                    print('\nindex_closest_lat')
                    print(index_closest_lat_d)                    
                    print('\nlen(index_closest_lat)')
                    print(len(index_closest_lat_d))
                    
                    print('\nindex_closest_lon')
                    print(index_closest_lon_d)
                    print('\nlen(index_closest_lon)')
                    print(len(index_closest_lon_d))
                    
                    print('\nclosest_value_lat')
                    print(closest_value_lat_d)                        
                    print('\nlen(closest_value_lat)')
                    print(len(closest_value_lat_d))
                    
                    print('\nclosest_value_lon')
                    print(closest_value_lon_d)                    
                    print('\nlen(closest_value_lon)')
                    print(len(closest_value_lon_d))
                    #k = 1 # then, k is not equal to zero, so will not enter in this loop anymore
                # register data concerning each project under the form of a csv, with the model, scenario, period, latitude and longitude
                print('i = '+str(i))
                print('k = '+str(k))
                print('start register data')
                df=register_data(climate_variable_path,name_project,name_variable,index_dates,dates,experiment,model,index_closest_lat_d,index_closest_lon_d,closest_value_lat_d,closest_value_lon_d,df,i)
                print('\nValue were found for the period and the project tested\n')
            else:
                print('\nNo value were found for the period and the project tested\n')
                continue # do the next for loop
        # test if dataframe is empty, if values exist for this period
    if not df.empty: # if dataframe is not empty, value were registered, the first part is run : a path to register the csv file is created, and the dataframe is registered in a csv file
        full_name = os.path.join(path_for_csv,title_file)
        print(full_name)
        df.to_csv(full_name) # register dataframe in csv file
        return (df,k,index_closest_lat_d,index_closest_lon_d,closest_value_lat_d,closest_value_lon_d)
    else: # if the dataframe is empty, no value were found, there is no value to register or to return
        #os.remove(path_for_file)# remove path
        return (df,k,index_closest_lat_d,index_closest_lon_d,closest_value_lat_d,closest_value_lon_d)# no df to return

In [11]:
# register data concerning each project under the form of a csv, with the model, scenario, period, latitude and longitude
def register_data(climate_variable_path,name_project,name_variable,index_dates,dates,experiment,model,index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon,df,i):
    print('Registering the data in a dataframe')
    #Open_path = Dataset(climate_variable_path) # open netcdf file
    #lat_dataframe = np.ma.getdata(Open_path.variables['lat']).data
    #lon_dataframe = np.ma.getdata(Open_path.variables['lon']).data
    #column_name = find_column_name(Open_path)
    #data_with_all = ma.getdata(Open_path.variables[column_name]).data
    
    ds = xr.open_dataset(climate_variable_path)
    r'''
    for moment in index_dates: # case if temporal resolution is daily
        data_dataframe = ds.variables[global_variable].isel(time=moment,lat=index_closest_lat[i],lon=index_closest_lon[i]) # data_with_all[moment,:,:]
        Date = (dates[moment],) # create tuple for iteration of dataframe
        Name_Project = (name_project,)

        # Create the MultiIndex
        midx = pd.MultiIndex.from_product([Name_Project,closest_value_lat[i],closest_value_lon[i],experiment, model, Date],names=['Name project', 'Latitude', 'Longitude','Experiment', 'Model', 'Date'])
        # multiindex to name the columns
        cols_str = [name_variable]
        #cols = pd.MultiIndex.from_product([lon_str,lon_dataframe])
        # Create the Dataframe
        Variable_dataframe = pd.DataFrame(data = data_dataframe, 
                                    index = midx,
                                    columns = cols_str)
        Variable_dataframe
        # Concatenate former and new dataframe
        df = pd.concat([df,Variable_dataframe])# register information for project
    '''
    conversion_factor = 1
    if global_variable =='pr':
        conversion_factor = 86400
        # convert precipitation data from kg.m^(-2).s^(-1) to mm/day :  1 kg/m2/s = 86400 mm/day
    data_dataframe = ds.variables[global_variable].isel(lat=index_closest_lat[i],lon=index_closest_lon[i]).values*conversion_factor # data_with_all[moment,:,:]
    # missing 29.02 ?
    if len(ds.variables['time'].values)<len(index_dates):
        max(ds.indexes['time'].year)
        
        max(ds.indexes['time'].day)
            # yes, missing 29.02
            for j in np.where((dates.month == 2) & (dates.day ==29))[0]:
                data_dataframe=np.insert(data_dataframe,j,np.nan)
    Date = dates.tolist() # create tuple for iteration of dataframe
    Name_Project = (name_project,)
    
    print('\ni = '+ str(i))
    print('\nclosest_value_lat[i]'+str(closest_value_lat[i]))
    print('\ntype(closest_value_lat[i])'+str(type(closest_value_lat[i])))
    
    # Create the MultiIndex
    midx = pd.MultiIndex.from_product([Name_Project,(closest_value_lat[i],),(closest_value_lon[i],),experiment, model, Date],names=['Name project', 'Latitude', 'Longitude','Experiment', 'Model', 'Date'])
    # multiindex to name the columns
    cols_str = [name_variable]
    #cols = pd.MultiIndex.from_product([lon_str,lon_dataframe])
    # Create the Dataframe
    Variable_dataframe = pd.DataFrame(data = data_dataframe, 
                                index = midx,
                                columns = cols_str)
    Variable_dataframe
    # Concatenate former and new dataframe
    df = pd.concat([df,Variable_dataframe])# register information for project
    
    ds.close() # to spare memory
    #Open_path.close # to spare memory
    return df

In [12]:
# function to return column name in the netCDF file
# all netCDF file form copernicus have this format for their variables names
# ['time', 'time_bnds', 'lat', 'lat_bnds', 'lon', 'lon_bnds', Name of climate variable of interest]
# take of 'time', 'time_bnds', 'lat', 'lat_bnds', 'lon', 'lon_bnds'
def find_column_name(Open_path):
    # make a list with every variables of the netCDF file of interest
    climate_variable_variables=list(Open_path.variables)
    # variables that are not the column name of interest 
    elements_not_climate_var =['time', 'time_bnds', 'bnds','lat', 'lat_bnds', 'lon', 'lon_bnds','time_bounds','bounds','lat_bounds','lon_bounds','height']
    for str in elements_not_climate_var:
        if str in climate_variable_variables:
            climate_variable_variables.remove(str)
    return climate_variable_variables[0]

In [13]:
def file_already_downloaded(path_for_csv,title_file,name_variable):
    print('The file was already downloaded')
    df = pd.read_csv(os.path.join(path_for_csv,title_file)) # read the downloaded data for the analysis

    # changing name of columns
    name_columns=df.iloc[0].array
    df.rename(columns={'Unnamed: 0':'Experiment','Unnamed: 1':'Model','Unnamed: 2':'Date','Unnamed: 3':'Latitude'}, inplace=True)

    lon_dataframe=name_columns[4:len(name_columns)] # register data for columns of multiindex

    df.drop([0,1], axis=0,inplace=True) # remove 2 first lines

    # recreate multiindex 

    # .... with columns
    df.set_index(['Name project', 'Latitude', 'Longitude','Experiment', 'Model', 'Date'],inplace=True)

    # .... with lines

    cols_str = [name_variable]
    df.columns=cols_str
    return df

In [14]:
################################################ Period for copernicus function ################################################
# Aim of the function: by giving it a first and last year of the period that must analyzed, this function produce several 
# vectors,containing time informations, useful to download and treat data from CMIP6 projections (https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=overview )
# Those time vectors are used in the copernicus_data and the dataframe_copernicus and csv_copernicus functions

# function year_copernicus produce 
# year: a vector containing all the year in the period of interest
# year_str: an array containing all the year in the period of interest in the string format
# index: an array containing the index of the year and year_str
#### Parameters of the function
# first_year: number in int format, of the first year of the period of interest
# last_year: number in int format, of the last year of the period of interest
def year_copernicus(first_year,last_year):
    year = np.arange(first_year,(last_year+1),1) # create vector of years
    year_str = [0]*len(year) # create initiale empty vector to convert years in int
    index = np.arange(0,len(year)) # create vector of index for year
    i = 0 # initialize index
    for i in index: # convert all the date in string format
        year_str[i]=str(year[i])
    return (year, year_str, index)

# function date_copernicus produce 
# dates: the format depend on the temporal reolution, but always contain the dates of the period of interest.
#        with temporal_resolution=daily, dates is a DatetimeIndex
#        with temporal_resolution=monthly, dates is a list
# index_dates: an array containing the index of the dates
#### Parameters of the function
# temporal_resolution: daily or monthly
# year_str: ???? produce by function year_copernicus, containing the year of the period of interest in string format
def date_copernicus(temporal_resolution,year_str):
    start_date = "01-01-"+year_str[0] # string start date based on start year
    stop_date = "31-12-"+year_str[len(year_str)-1] # string stop date based on stop year
    if temporal_resolution =='daily':
        # vector of dates between start date and stop date
        dates = pd.date_range(start_date,stop_date)# dates is a pandas.core.indexes.datetimes.DatetimeIndex
        # By default, freq = 'D', which means calendar day frequency (source : https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases)
        #index_dates = np.arange(0,len(dates)) # vector containning index o dates vector
    if temporal_resolution =='monthly':
        dates = pd.date_range(start_date,stop_date,freq='MS') # vector of dates between start date and stop date
        dates=list(dates.strftime('%m-%Y')) # dates is an pandas.core.indexes.base.Index, not a pandas.core.indexes.datetimes.DatetimeIndex
    #if temporal_resolution =='fixed': trouver donnees pour gerer cela
    index_dates = np.arange(0,len(dates)) # vector containning index o dates vector
    return (dates, index_dates)

### Copernicus function
Some data comes from copernicus and can be directly taken form the website thans to CDS. The following functions serves this purpose
#### Parameters of the function :
projections-cmip6 : name of the web page, in this case, 'projections-cmip6'
format : zip or tar.gz
temporal_resolution : daily or monthly or fixed
SSP : sscenario that is studied "Historical", "SSP1-1.9", "SSP1-2.6" ...
Variable : variable to be studied
model: model of projection to choose
year: year of study to choose
area: area of study
month: month to be studied

In [15]:
################################################### Copernicus data function ###################################################
# Aim of the function : read nc data found on copernicus CMIP6 projections (https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=overview )
# Actions of this function
#     1) check which parameters are asked or not in the variables dictionnary, and modify the last depend on the parameters 
#        chosen by the user before
#     2) All this step is done in function try_download_copernicus: thanks to c.retrieve function and the variables dictionnary, 
#        the chosen data are download in zip format, dezipped and registered in a specific folder. 
#     3) the function looks in the specific folder for a nc format file, and once found, return the path of this nc format file

#### Parameters of the function
# temporal_resolution : daily or monthly or fixed
# SSP : sscenario that is studied "Historical", "SSP1-1.9", "SSP1-2.6" ...
# name_variable : variable to be studied
# model: model of projection to choose
# year: year(s) of study to choose
# area: area of study, if not specific, area should be an empty array area=[]
# path_for_file: path where the file must be unzipped
# out_path: path were all the outputs are registered, defined by the user in the begining of the main code
# name_area : to specify if we are only looking data for a project or for a wider zone

def copernicus_data(temporal_resolution,SSP,name_variable,model,year,area,path_for_file,out_path,name_area,source): 
    # create path for the downloaded file
    start_path = os.path.join(out_path,'Data_download_zip')
    file_download=create_file_download_path(start_path,name_variable,name_area,SSP,model,year,temporal_resolution,source)
    
    if not os.path.isdir(path_for_file):
        print('path_for_file does not exist: the data may not have been downloaded') 
        if not os.path.isdir(file_download):
            print('file_download does not exist: the data were not downloaded')
            # function try to download from copernicus
            final_path = download_data(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
            return final_path
            
        else: # if the path already exist, the data in zip format should also exists
            if not os.path.isfile(os.path.join(file_download,'download.zip')):
                print('The path for the download file exists, but is empty')
                # function try to download from copernicus
                final_path = download_data(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                return final_path
            else:
                print('file_download does exist, the data have been downloaded, but not extracted')
                final_path=download_extract(path_for_file,file_download,source)
                final_path = search_for_nc(final_path) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
                return final_path
                
    else: # the path for the file exists
        if not os.listdir(path_for_file): # if the path is empty
            if not os.path.isdir(file_download):
                print('file_download does not exist: the data were not downloaded')
                # function try to download from copernicus
                final_path = download_data(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                return final_path

            else: # if the path already exist, the data in zip format should also exists
                if not os.path.isfile(os.path.join(file_download,'download.zip')):
                    print('The path for the download file exists, but is empty')
                    # function try to download from copernicus
                    final_path = download_data(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                    return final_path
                else:
                    print('file_download does exist, the data have been downloaded, but not extracted')
                    final_path=download_extract(path_for_file,file_download,source)
                    final_path = search_for_nc(final_path) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
                    return final_path
        
        else: # if the path is not empty
            path_file=os.path.join(path_for_file,source)# data was added because of a problem during downloading
            final_path=search_for_nc(path_file) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
            if final_path is None: # if no nc file exists, need to check again if the file with those parameters exists
                test= os.path.join(file_download,source,'download.zip')
                if not os.path.join(test):# the file was not downloaded 
                    final_path = try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                else: # the file was already downloaded but not not extracted
                    final_path=download_extract(path_for_file,file_download,source)
                final_path = search_for_nc(final_path) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
            return final_path

In [16]:
def download_data(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source):
    path_file = try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
    if path_file is None: # for this climate variable, the parameter do not fit
        return path_file
    final_path=search_for_nc(path_file) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
    print('\n')
    print('---------------  Path to nc file exists ?? ---------------\n')
    print(os.path.isfile(final_path))
    print('\n')
    return final_path

In [17]:
################################################### Copernicus data function ###################################################
# Aim of the function : read nc data found on copernicus CMIP6 projections (https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=overview )
# Actions of this function
#     1) check which parameters are asked or not in the variables dictionnary, and modify the last depend on the parameters 
#        chosen by the user before
#     2) All this step is done in function try_download_copernicus: thanks to c.retrieve function and the variables dictionnary, 
#        the chosen data are download in zip format, dezipped and registered in a specific folder. 
#     3) the function looks in the specific folder for a nc format file, and once found, return the path of this nc format file

#### Parameters of the function
# temporal_resolution : daily or monthly or fixed
# SSP : sscenario that is studied "Historical", "SSP1-1.9", "SSP1-2.6" ...
# name_variable : variable to be studied
# model: model of projection to choose
# year: year(s) of study to choose
# area: area of study, if not specific, area should be an empty array area=[]
# path_for_file: path where the file must be unzipped
# out_path: path were all the outputs are registered, defined by the user in the begining of the main code
# name_area : to specify if we are only looking data for a project or for a wider zone

def copernicus_data_former(temporal_resolution,SSP,name_variable,model,year,area,path_for_file,out_path,name_area,source): 
    # create path for the downloaded file
    start_path = os.path.join(out_path,'Data_download_zip')
    file_download=create_file_download_path(start_path,name_variable,name_area,SSP,model,year,temporal_resolution,source) 
    # file_download does not have name of the download file, just the path
    if not os.path.isdir(path_for_file):
        print('path_for_file does not exist: the data may not have been downloaded') 
        if not os.path.isdir(file_download):
            print('file_download does not exist: the data were not downloaded')
            # function try to download from copernicus
            path_file = try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
            if path_file is None: # for this climate variable, the parameter do not fit
                return path_file
            final_path=search_for_nc(path_file) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
            print('\n')
            print('---------------  Path to nc file exists ?? ---------------\n')
            print(os.path.isfile(final_path))
            print('\n')
            return final_path
            
        else: # if the path already exist, the data in zip format should also exists
            if not os.path.isfile(os.path.join(file_download,'download.zip')):
                print('The path for the download file exists, but is empty')
                # function try to download from copernicus
                path_file = try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                if path_file is None: # for this climate variable, the parameter do not fit
                    return path_file
                final_path=search_for_nc(path_file) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
                print('\n')
                print('---------------  Path to nc file exists ?? ---------------\n')
                print(os.path.isfile(final_path))
                print('\n')
                return final_path
            else:
                print('file_download does exist, the data have been downloaded, but not extracted')
                #path_file=os.path.join(path_for_file,source)# source was added because of a problem during downloading
                final_path=download_extract(path_for_file,file_download,source)
                #final_path=search_for_nc(path_file) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
                #if final_path is None:# if no nc file exists, need to check again if the file with those parameters exists
                #    final_path = try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                final_path = search_for_nc(final_path) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
                return final_path
                
    else: # the path for the file exists
        if not os.listdir(path_for_file): # if the path is empty
            final_path=download_extract(path_for_file,file_download,source)
            final_path = search_for_nc(final_path) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
        
        
        else: # if the path is not empty
            path_file=os.path.join(path_for_file,source)# data was added because of a problem during downloading
            final_path=search_for_nc(path_file) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
            if final_path is None: # if no nc file exists, need to check again if the file with those parameters exists
                test= os.path.join(file_download,source,'download.zip')
                if not os.path.join(test):# the file was not downloaded 
                    final_path = try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source)
                else: # the file was already downloaded but not not extracted
                    final_path=download_extract(path_for_file,file_download,source)
                final_path = search_for_nc(final_path) # looking for the netCDF file in format .nc and look if path length is a problem at the same time
            return final_path

In [18]:
def try_download_copernicus(temporal_resolution,SSP,name_variable,model,area,year,path_for_file,file_download,source):
    c = cdsapi.Client()# function to use the c.retrieve
    # basic needed dictionnary to give to the c.retrieve function the parameters asked by the user
    variables = {
                'format': 'zip', # this function is only designed to download and unzip zip files
                'temporal_resolution': temporal_resolution,
                'experiment': SSP,
                'variable': name_variable,
                'model': model,
    }

    if area != []: # the user is interested by a sub region and not the whole region 
        variables.update({'area':area}) 

    if name_variable == 'air_temperature':
        variables['level'] = '1000' # [hPa], value of the standard pressure at sea level is 1013.25 [hPa], so 1000 [hPa] is the neareste value. Other pressure value are available but there is no interest for the aim of this project

    if temporal_resolution != 'fixed':# if 'fixed', no year, month, date to choose
        variables['year']=year # period chosen by the user
        variables['month']= calendar.default_month  # be default, all the months are given; defined in class calendar
        if temporal_resolution == 'daily':
            variables['day']= calendar.default_day # be default, all the days are given; defined in class calendar
    # c.retrieve download the data from the website
    try:
        c.retrieve(
            'projections-cmip6',
            variables,
            'download.zip') # the file in a zip format is registered in the current directory
    except:
        print('Some parameters are not matching')
        return # stop the function, because some data the user entered are not matching
    print('The file has been download')
    # function to extract the downloaded zip
    path_file=download_extract(path_for_file,file_download,source)
    print('The file has been extracted')
    return path_file

In [19]:
# download_extract functions aims to return the path were the downloaded file in zip format is extracted

def download_extract(path_for_file,file_download,source):
    if not os.path.isdir(path_for_file): # path_for_file does not exists, need to ensure that is is created
        os.makedirs(path_for_file) # to ensure the creation of the path
        print('Path for the file is created, did not existed before')
    # unzip the downloaded file
    if 'download.zip' not in os.listdir(): # check if download is in the working directory
        print('The download zip is moved to the working directory')
        path_downloaded_zip=os.path.join(file_download,'download.zip')
        shutil.move(path_downloaded_zip,r'C:\Users\CLMRX\OneDrive - COWI\Documents\GitHub\CRVA_tool') # move download fil to working directory
    
    from zipfile import ZipFile
    zf = ZipFile('download.zip', 'r')
    zf.extractall(source) # if no precision of directory, extract in current directory
    zf.close()

    if not os.path.isdir(file_download): # path_for_file does not exists, need to ensure that is is created
        os.makedirs(file_download) # to ensure the creation of the path
    # moving download to appropriate place
    #test = os.path.join(file_download,'download.zip')
    #if not os.path.isfile(test):
    shutil.move('download.zip',file_download) # no need to delete 'download.zip' from inital place
    #test = os.path.join(path_for_file,source)
    #if not os.path.isdir(test):
    shutil.move(source,path_for_file) # move extracted data to the path created for them
    path_file=os.path.join(path_for_file,source)
    print('\n The downloaded file is extracted')
    return path_file

In [20]:
# seach_for_nc is a function looking in path_for_file for a document in .nc format

def search_for_nc(path_for_file):
    print('path_for_file does exist Function copernicus search for nc')
    for file in os.listdir(path_for_file):
        if file.endswith(".nc"):
            final_path=os.path.join(path_for_file, file)
            
            print('The file is in the path Function copernicus search for nc\n')
            print('Before path_length, The final path for the nc file is: '+final_path)
            answer = str(os.path.isfile(final_path))
            print('\n The final path for nc file exists ? '+answer+'\n')
            final_path=path_length(final_path) # check if length of path is too long
            print('After path_length, The final path for the nc file is: '+final_path)
            answer = str(os.path.isfile(final_path))
            print('\n The final path for nc file exists ? '+answer+'\n')
            return final_path # the function returns the path of the nc file of interest
            break # stop the function if a nc file was found 
        else:
            pass
    # the all folder has been search and there is no nc file in it
    print('Problem : No nc file was found Function copernicus Function copernicus search for nc')# this line is out of the for loop, 
    #because it should only appear once all the folder has been examinated and if the break of the if was not used

In [21]:
# this functions test if the path is too long
# if the path is more than 250 char, the path wll be modified in order for windows to accept is as a path

def path_length(str1):
    if len(str1)>250:
        path = os.path.abspath(str1) # normalize path
        if path.startswith(u"\\\\"):
            path=u"\\\\?\\UNC\\"+path[2:]
        else:
            path=u"\\\\?\\"+path
        return path
    else:
        return str1

In [22]:
# function to create path for the downloaded file
def create_file_download_path(start_path,name_variable,name_area,SSP,model,year,temporal_resolution,source):
    # adapt the name of the folder fot the period, depending on the type of period
    if len(year)==1:
        file_download = os.path.join(start_path,name_variable,name_area,SSP,model,year,source)
    elif len(year)>1:
        period=year[0]+'-'+year[len(year)-1]
        file_download = os.path.join(start_path,name_variable,name_area,SSP,model,period,source)
    elif temporal_resolution == 'fixed':
        file_download = os.path.join(start_path,name_variable,name_area,SSP,model,'fixed_period',source)
    return file_download

In [23]:
### Identify index of latitudes and longitudes

In [24]:
download_extract# this functions aims to return the closest latitudes and longitudes to the projects, and the respectives index 
#  in the lat and lon vectors of the file
def _lat_lon(path,lat_projects,lon_projects):
    ds =  xr.open_dataset(path) 
    # ds.indexes['time'] gives back CFTimeIndex format, with hours. The strftime('%d-%m-%Y') permits to have time 
    # as an index, with format '%d-%m-%Y'. The .values permits to have an array
    lat  = ds.lat.values
    lon  = ds.lon.values
    ds.close() # to spare memory
    # preallocate space for the future vectors
    index_closest_lat = []
    index_closest_lon = []
    closest_value_lat = []
    closest_value_lon = []
    for j in np.arange(0,len(lat_projects)):
        (A,B)=closest_lat_lon_to_proj(lat_projects[j],lat)
        #return lat,lat_projects[j]
        index_closest_lat.append(A[0])
        closest_value_lat.append(B[0])
        (C,D)=closest_lat_lon_to_proj(lon_projects[j],lon)
        index_closest_lon.append(C[0])
        closest_value_lon.append(D[0])
    return index_closest_lat,index_closest_lon,closest_value_lat,closest_value_lon


# this function aims to select the closest point to the geographical point of the project
# the function takes as input 
#     location_project, which is a numpy.float64
#     vector, which is a numpy.ndarray
# the function returns
#     closest_value[0], a numpy.float64

def closest_lat_lon_to_proj(location_project,vector):
    # the function any() returns a boolean value. Here, the function test if there are elements in the array 
    # containing the difference between the vector and the location_project, equal to the minimum of the absolute 
    # value of the difference between the vector and the location_project
    if any(np.where((vector - location_project) == min(abs(vector - location_project)))[0]):
        # the function any() returned True
        # there is an element in the vector that is equal to the minimum of the absolute value of the difference 
        # between the vector and the location_project
        
        # the function np.where() returns the index for which (vector - location_project) == min(abs(vector - location_project))
        index_closest = np.where((vector - location_project) == min(abs(vector - location_project)))[0]
        closest_value = vector[index_closest]
    else:
        # the function any() returned False
        # there is NO element in the vector that is equal to the minimum of the absolute value of the difference 
        # between the vector and the location_project
        
        # the function np.where() returns the index for which (vector - location_project) == -min(abs(vector - location_project))
        index_closest = np.where((vector - location_project) == -min(abs(vector - location_project)))[0]
        closest_value = vector[index_closest]
    return index_closest, closest_value 
    # the function returns
    #     first, the value of the index of the element of vector, that is the closest to location_project    
    #     second, the array containing the element of vector, that is the closest to location_project


# Register copernicus data

In [25]:
(year, year_str, index)=year_copernicus(y_start,y_end)

df = csv_copernicus(temporal_resolution,year_str,copernicus_elements.experiments_historical,copernicus_elements.models,out_path,global_variable, name_variable,name_projects,area_projects,lat_projects,lon_projects,'Copernicus-CMIP6')  

############################### Project name: WTP_Mutua_EIB ###############################
FUNCTION DATAFRAME_COPERNICUS
k = 0
Test with scenario historical
Test with model access_cm2
path_for_file does exist Function copernicus search for nc
The file is in the path Function copernicus search for nc

Before path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\access_cm2\1950-2014\Copernicus-CMIP6\pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231_v20191108.nc

 The final path for nc file exists ? True

After path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\access_cm2\1950-2014\Copernicus-CMIP6\pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231_v20191108.nc

 The final path for nc file exists ? True

suppose to enter once

index_closest_lat
[1, 2, 1, 6]

len(index_closest_lat)
4

index_closest_lon
[27

2023-07-04 15:37:27,133 INFO Welcome to the CDS
2023-07-04 15:37:27,134 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6
2023-07-04 15:37:27,248 INFO Request is queued
2023-07-04 15:37:28,283 INFO Request is failed
2023-07-04 15:37:28,284 ERROR Message: an internal error occurred processing your request
2023-07-04 15:37:28,285 ERROR Reason:  No matching data for request {'experiment': 'historical', 'model': 'AWI-CM-1-1-MR', 'temporal_resolution': 'day', 'variable': 'pr'}
2023-07-04 15:37:28,285 ERROR   Traceback (most recent call last):
2023-07-04 15:37:28,286 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/cdshandlers/services/handler.py", line 59, in handle_request
2023-07-04 15:37:28,286 ERROR       result = cached(context.method, proc, context, context.args, context.kwargs)
2023-07-04 15:37:28,287 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/caching.py", line 108, in cached
2023-07-04 15:37:28,287 ERROR       result = proc(

Some parameters are not matching

No value were found for the period and the project tested

Test with model bcc_csm2_mr
path_for_file does exist Function copernicus search for nc
The file is in the path Function copernicus search for nc

Before path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\bcc_csm2_mr\1950-2014\Copernicus-CMIP6\pr_day_BCC-CSM2-MR_historical_r2i1p1f1_gn_19500101-20141231_v20181116.nc

 The final path for nc file exists ? True

After path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\bcc_csm2_mr\1950-2014\Copernicus-CMIP6\pr_day_BCC-CSM2-MR_historical_r2i1p1f1_gn_19500101-20141231_v20181116.nc

 The final path for nc file exists ? True

suppose to enter once

index_closest_lat
[1, 2, 1, 7]

len(index_closest_lat)
4

index_closest_lon
[46, 45, 45, 47]

len(index_closest_lon)
4

closest_value_lat
[

2023-07-04 15:37:59,915 INFO Welcome to the CDS
2023-07-04 15:37:59,917 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6
2023-07-04 15:37:59,977 INFO Request is queued



i = 0

closest_value_lat[i]-19.62606908419931

type(closest_value_lat[i])<class 'numpy.float64'>

Value were found for the period and the project tested

Test with model cams_csm1_0
path_for_file does not exist: the data may not have been downloaded
file_download does not exist: the data were not downloaded


2023-07-04 15:38:01,015 INFO Request is failed
2023-07-04 15:38:01,016 ERROR Message: an internal error occurred processing your request
2023-07-04 15:38:01,016 ERROR Reason:  No matching data for request {'experiment': 'historical', 'model': 'CAMS-CSM1-0', 'temporal_resolution': 'day', 'variable': 'pr'}
2023-07-04 15:38:01,017 ERROR   Traceback (most recent call last):
2023-07-04 15:38:01,017 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/cdshandlers/services/handler.py", line 59, in handle_request
2023-07-04 15:38:01,019 ERROR       result = cached(context.method, proc, context, context.args, context.kwargs)
2023-07-04 15:38:01,019 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/caching.py", line 108, in cached
2023-07-04 15:38:01,020 ERROR       result = proc(context, *context.args, **context.kwargs)
2023-07-04 15:38:01,021 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/services.py", line 124, in __call__
2023-07-04 15:38:01,022 ERROR       return p(*args, **kwar

Some parameters are not matching

No value were found for the period and the project tested

Test with model canesm5_canoe
path_for_file does not exist: the data may not have been downloaded
file_download does not exist: the data were not downloaded


2023-07-04 15:38:02,171 INFO Request is failed
2023-07-04 15:38:02,172 ERROR Message: an internal error occurred processing your request
2023-07-04 15:38:02,172 ERROR Reason:  No matching data for request {'experiment': 'historical', 'model': 'CanESM5-CanOE', 'temporal_resolution': 'day', 'variable': 'pr'}
2023-07-04 15:38:02,173 ERROR   Traceback (most recent call last):
2023-07-04 15:38:02,174 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/cdshandlers/services/handler.py", line 59, in handle_request
2023-07-04 15:38:02,175 ERROR       result = cached(context.method, proc, context, context.args, context.kwargs)
2023-07-04 15:38:02,175 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/caching.py", line 108, in cached
2023-07-04 15:38:02,175 ERROR       result = proc(context, *context.args, **context.kwargs)
2023-07-04 15:38:02,176 ERROR     File "/opt/cdstoolbox/cdscompute/cdscompute/services.py", line 124, in __call__
2023-07-04 15:38:02,178 ERROR       return p(*args, **kw

Some parameters are not matching

No value were found for the period and the project tested

Test with model cesm2_fv2
path_for_file does exist Function copernicus search for nc
The file is in the path Function copernicus search for nc

Before path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cesm2_fv2\1950-2014\Copernicus-CMIP6\pr_day_CESM2-FV2_historical_r1i1p1f1_gn_19500101-20141231_v20191120.nc

 The final path for nc file exists ? True

After path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cesm2_fv2\1950-2014\Copernicus-CMIP6\pr_day_CESM2-FV2_historical_r1i1p1f1_gn_19500101-20141231_v20191120.nc

 The final path for nc file exists ? True

suppose to enter once

index_closest_lat
[0, 1, 0, 4]

len(index_closest_lat)
4

index_closest_lon
[20, 20, 19, 20]

len(index_closest_lon)
4

closest_value_lat
[-19.894736

2023-07-04 15:38:04,416 INFO Welcome to the CDS
2023-07-04 15:38:04,417 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6



i = 0

closest_value_lat[i]-19.89473684210526

type(closest_value_lat[i])<class 'numpy.float64'>

Value were found for the period and the project tested

Test with model cesm2_waccm_fv2
path_for_file does not exist: the data may not have been downloaded
file_download does not exist: the data were not downloaded


2023-07-04 15:38:04,486 INFO Request is queued
2023-07-04 15:38:05,539 INFO Request is running
2023-07-04 15:38:12,794 INFO Request is failed
2023-07-04 15:38:12,794 ERROR Message: an internal error occurred processing your request
2023-07-04 15:38:12,794 ERROR Reason:  Process error: Resulting object does not have monotonic global indexes along dimension time
2023-07-04 15:38:12,795 ERROR   Traceback (most recent call last):
2023-07-04 15:38:12,795 ERROR     File "/usr/local/lib/python3.6/site-packages/rooki/results.py", line 33, in url
2023-07-04 15:38:12,796 ERROR       return self.response.get()[0]
2023-07-04 15:38:12,797 ERROR     File "/usr/local/lib/python3.6/site-packages/birdy/client/outputs.py", line 40, in get
2023-07-04 15:38:12,798 ERROR       raise ProcessFailed("Sorry, process failed.")
2023-07-04 15:38:12,799 ERROR   birdy.exceptions.ProcessFailed: Sorry, process failed.
2023-07-04 15:38:12,848 INFO Welcome to the CDS
2023-07-04 15:38:12,848 INFO Sending request to http

Some parameters are not matching

No value were found for the period and the project tested

Test with model cmcc_cm2_hr4
path_for_file does not exist: the data may not have been downloaded
file_download does not exist: the data were not downloaded


2023-07-04 15:38:13,957 INFO Request is running
2023-07-04 15:41:04,394 INFO Request is completed
2023-07-04 15:41:04,395 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data9/adaptor.esgf_wps.retrieve-1688478055.9662063-19665-11-65e07e45-de4f-4188-8ac4-219abb8f4030.zip to download.zip (188M)
2023-07-04 15:41:43,479 INFO Download rate 4.8M/s                                                                      


The file has been download
Path for the file is created, did not existed before

 The downloaded file is extracted
The file has been extracted
path_for_file does exist Function copernicus search for nc
The file is in the path Function copernicus search for nc

Before path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cmcc_cm2_hr4\1950-2014\Copernicus-CMIP6\pr_day_CMCC-CM2-HR4_historical_r1i1p1f1_gn_19500101-20141231_v20200904.nc

 The final path for nc file exists ? True

After path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cmcc_cm2_hr4\1950-2014\Copernicus-CMIP6\pr_day_CMCC-CM2-HR4_historical_r1i1p1f1_gn_19500101-20141231_v20200904.nc

 The final path for nc file exists ? True



---------------  Path to nc file exists ?? ---------------

True


suppose to enter once

index_closest_lat
[2, 3, 2, 9]

len(index_cl

2023-07-04 15:43:07,800 INFO Welcome to the CDS
2023-07-04 15:43:07,802 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6
2023-07-04 15:43:07,882 INFO Request is queued
2023-07-04 15:43:08,959 INFO Request is running
2023-07-04 15:45:59,749 INFO Request is completed
2023-07-04 15:45:59,750 INFO Downloading https://download-0019.copernicus-climate.eu/cache-compute-0019/cache/data2/adaptor.esgf_wps.retrieve-1688478311.7067168-30684-14-d1bf47af-0ab5-499c-bab0-6fc6aa09e91e.zip to download.zip (201.6M)
2023-07-04 15:46:42,896 INFO Download rate 4.7M/s                                                                      


The file has been download
Path for the file is created, did not existed before

 The downloaded file is extracted
The file has been extracted
path_for_file does exist Function copernicus search for nc
The file is in the path Function copernicus search for nc

Before path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cmcc_esm2\1950-2014\Copernicus-CMIP6\pr_day_CMCC-ESM2_historical_r1i1p1f1_gn_19500101-20141231_v20210114.nc

 The final path for nc file exists ? True

After path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cmcc_esm2\1950-2014\Copernicus-CMIP6\pr_day_CMCC-ESM2_historical_r1i1p1f1_gn_19500101-20141231_v20210114.nc

 The final path for nc file exists ? True



---------------  Path to nc file exists ?? ---------------

True


suppose to enter once

index_closest_lat
[2, 3, 2, 9]

len(index_closest_lat)
4

2023-07-04 15:48:14,344 INFO Welcome to the CDS
2023-07-04 15:48:14,345 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6
2023-07-04 15:48:14,450 INFO Request is queued
2023-07-04 15:48:15,512 INFO Request is running
2023-07-04 16:00:35,230 INFO Request is completed
2023-07-04 16:00:35,231 INFO Downloading https://download-0017.copernicus-climate.eu/cache-compute-0017/cache/data3/adaptor.esgf_wps.retrieve-1688479048.1472487-5080-17-c8bd1d41-2823-4111-9846-51c3ad78fe26.zip to download.zip (907.5M)
2023-07-04 16:03:16,569 INFO Download rate 5.6M/s                                                                      


The file has been download
Path for the file is created, did not existed before

 The downloaded file is extracted
The file has been extracted
path_for_file does exist Function copernicus search for nc
The file is in the path Function copernicus search for nc

Before path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cnrm_cm6_1_hr\1950-2014\Copernicus-CMIP6\pr_day_CNRM-CM6-1-HR_historical_r1i1p1f2_gr_19500101-20080905_v20191021.nc

 The final path for nc file exists ? True

After path_length, The final path for the nc file is: \\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cnrm_cm6_1_hr\1950-2014\Copernicus-CMIP6\pr_day_CNRM-CM6-1-HR_historical_r1i1p1f2_gr_19500101-20080905_v20191021.nc

 The final path for nc file exists ? True



---------------  Path to nc file exists ?? ---------------

True


suppose to enter once

index_closest_lat
[3, 5, 4, 17]

len(ind

IndexError: index 22704 is out of bounds for axis 0 with size 21448

In [None]:
# problem, do not find lat and lon closest to the project !!! why ? because lat in the path is smaller than the one of the project

In [26]:
path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\cnrm_cm6_1_hr\1950-2014\Copernicus-CMIP6\pr_day_CNRM-CM6-1-HR_historical_r1i1p1f2_gr_19500101-20080905_v20191021.nc'

In [27]:
ds = xr.open_dataset(path)

In [28]:
ds.variables['lat'][0].values

array(-21.22050565)

In [38]:
type(ds.variables['time'].values[0])

numpy.datetime64

In [46]:
max(ds.indexes['time'].year)

2008

In [None]:
    (dates, index_dates)=date_copernicus(temporal_resolution,year_str) # create time vector depending on temporal resolution


In [None]:
index_dates

In [None]:
dates

In [None]:
ds.variables['time'].values[0]

In [None]:
ds.variables['time_bnds']

In [None]:
time_cft=ds.indexes['time'].to_datetimeindex()

In [None]:
type(time_cft[0].month)

In [None]:
len(time_cft)

In [None]:
time_cft_to_comp=[str(time_cft[k].year)+'-'+str(time_cft[k].month)+'-'+str(time_cft[k].day) for k in np.arange(0,len(time_cft))]

In [None]:
type(time_cft_to_comp)

In [60]:
ds.indexes['time'].year#.to_pydatetime()

Index([1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950,
       ...
       2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008, 2008],
      dtype='int32', name='time', length=21433)

In [57]:
test.dt.to_period('M')

AttributeError: 'numpy.ndarray' object has no attribute 'dt'

In [None]:
path2 = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\precipitation\WTP_Mutua_EIB\historical\access_cm2\1950-2014\Copernicus-CMIP6\pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_19500101-20141231_v20191108.nc'

In [None]:
ds2 = xr.open_dataset(path2)

In [None]:
ds2.variables['pr']

In [None]:
np.shape(ds2.variables['pr'].isel(lat=40,lon=10).values)

In [None]:
ds2.variables['time'].values[0]

In [None]:
np.shape(ds2.variables['pr'].isel(time=index_dates,lat=10,lon=10).values)

In [None]:
pd.to_datetime(ds2.variables['time'])[0].year

In [None]:
len(pd.to_datetime(ds2.variables['time']))

In [None]:
time_to_compare = [str(pd.to_datetime(ds2.variables['time'])[k].year)+'-'+str(pd.to_datetime(ds2.variables['time'])[k].month)+'-'+str(pd.to_datetime(ds2.variables['time'])[k].day) for k in np.arange(0,len(pd.to_datetime(ds2.variables['time'])))]

In [None]:
type(time_to_compare)

In [None]:
list(set(time_to_compare) - set(time_cft_to_comp))

In [None]:
# source of the problem found !! bissextle years

In [None]:
len(ds2.variables['time'].values)

In [None]:
test=ds.variables['pr'].isel(lat=10,lon=10).values

In [None]:
len(test)

In [None]:
index=np.where((dates.month == 2) & (dates.day ==29))[0]

In [None]:
index[:]

In [None]:
test3 = test
for i in index:
    test3=np.insert(test3,i,np.nan)

In [None]:
test2 = test
test2[index]

In [None]:
test2=np.insert(test2,index,np.nan)

In [None]:
test2[index]

In [None]:
test[np.where((dates.month == 2) & (dates.day ==29))[0]]

In [None]:
np.shape(np.where((dates.month == 2) & (dates.day ==29))[0]+1)

In [None]:
len(np.shape(np.where((dates.month == 2) & (dates.day ==29))[0]+1))

In [None]:
a = np.empty((len(np.where((dates.month == 2) & (dates.day ==29))[0]+1)),)
a.fill(np.nan)
a

In [None]:
dates[0]

In [None]:
dates.month

In [None]:
dates.day

In [None]:
m = [1,1,2,2]

In [None]:
m[np.where(m==2)[0]]='nan'

In [None]:
np.where(m==2)

In [None]:
np.nan

In [None]:
val =[8,8,8,8,8,8]

In [None]:
np.insert(val,[2,4],9)