This notebook aims to group the download of data coming from https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=form .

# Packages, functions and paths

In [None]:
import geopandas as gpd
import os
import os.path
from netCDF4 import Dataset
import numpy as np

In [None]:
def download_extract(path_file,path_for_file):
    #if not os.path.isdir(path_for_file): # path_for_file does not exists, need to ensure that is is created
    #    os.makedirs(path_for_file) # to ensure the creation of the path
    # unzip the downloaded file
    from zipfile import ZipFile
  
    # loading the temp.zip and creating a zip object
    os.chdir(path_file)
    with ZipFile(path_for_file, 'r') as zObject:
      
    # Extracting all the members of the zip 
    # into a specific location.
        print(zObject)
        zObject.extractall()
    
    print('\n ----------------------------- The downloaded file is extracted in the indicated file -----------------------------')
    return

In [None]:
# This functions aims to check if the path is too long, and if yes to deal with it
# this function was created because a bug exist when using python on windows. When the path is too long (more than 250 characters), 
# '\\\\?\\' should be added before the path in order for Windows to understand it 
# (source: https://stackoverflow.com/questions/29557760/long-paths-in-python-on-windows)

# the input is a path in a string format
# the output is the path in a string format
def path_length(str1):
    if len(str1)>250:
        # the path has more than 250 characters
        path = os.path.abspath(str1) # normalize path
        if path.startswith(u"\\\\"):
            path=u"\\\\?\\UNC\\"+path[2:]
        else:
            path=u"\\\\?\\"+path
        return path
    else:
        # the path has less than 250 characters, the path is not too long
        return str1

In [None]:
# seach_for_nc is a function looking in path_for_file for a document in .nc format

def search_for_nc(path_for_file):
    print('path_for_file does exist Function copernicus search for nc')
    for file in os.listdir(path_for_file):
        if file.endswith(".nc"):
            final_path=os.path.join(path_for_file, file)
            
            print('The file is in the path Function copernicus search for nc\n')
            print('Before path_length, The final path for the nc file is: '+final_path)
            answer = str(os.path.isfile(final_path))
            print('\n The final path for nc file exists ? '+answer+'\n')
            final_path=path_length(final_path) # check if length of path is too long
            print('After path_length, The final path for the nc file is: '+final_path)
            answer = str(os.path.isfile(final_path))
            print('\n The final path for nc file exists ? '+answer+'\n')
            return final_path # the function returns the path of the nc file of interest
            break # stop the function if a nc file was found 
        else:
            pass
    # the all folder has been search and there is no nc file in it
    print('Problem : No nc file was found Function copernicus Function copernicus search for nc')# this line is out of the for loop, 
    #because it should only appear once all the folder has been examinated and if the break of the if was not used

In [None]:
#project locations (shapefile with location of elements to evaluate)
data_folder=r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets'
#data_folder=os.path.join(os.pardir,'dataset')
project_location_path=os.path.join(data_folder,'Mozambique_PPIS/EIB_locations_few.shp')
project_id='Name' #name of column used as id

#study boundary (optional)
study_area_path=os.path.join(data_folder,'Mozambique_PPIS/mozambique.shp')

#output folder
#out_path=r'\\COWI.net\projects\A245000\A248363\CRVA\Scripts\outputs'
out_path=r'C:\Users\CLMRX\OneDrive - COWI\Documents\GitHub\CRVA_tool\outputs'
out_path_database=r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets'
#projection CRS (default = 'EPSG:4326')
bCRS='EPSG:4326'

#buffer for climate/grid variables
buffer=40000 #buffer in meters, 0 = no buffer is computed

mCRS='EPSG:31983' #metric CRS for buffer in meters (find relevant metric CRS for location!)

# User input
Only the part of interest for the user will be downloaded.

In [None]:
#load shapefiles
projects = gpd.read_file(project_location_path).to_crs(bCRS)
study_area = gpd.read_file(study_area_path).to_crs(bCRS)

#calculate buffer around points/shape
if buffer != 0:
    projects_buf=projects.to_crs(mCRS)  #project to crs with metric units to get buffer in meters
    projects_buf['geometry']=projects.to_crs(mCRS).buffer(buffer) #assign the buffer as the new geometry - 
    projects_buf=projects_buf.to_crs(bCRS)#project back to orginal crs

    
#plot shapefiles
study_area.plot()
projects.plot()
projects_buf.plot() 


#show table
projects[[project_id]]

# register geographic information concerning projects
lon_projects = projects['geometry'].x
lat_projects = projects['geometry'].y
buffer_project = 0.1
area_projects = [lat_projects - buffer_project, lat_projects+buffer_project, lon_projects-buffer_project,lon_projects+buffer_project] # list format

#### determination of the geographical zone of interest 
lat_min_wanted = min(study_area['LAT'])-10
lat_max_wanted = max(study_area['LAT'])+10
lon_min_wanted = min(study_area['LON'])-20
lon_max_wanted = max(study_area['LON'])+20
# addind and substracting to lon and lat to have margin
# substracting more to longitude because the range of longitude is -180 to 180. The range of latitude is -90 to 90

area = [lat_min_wanted, lon_min_wanted, lat_max_wanted,lon_max_wanted,] # used to download from copernicus

# Temperature

Need the maximum monthly near-surface temperature to compare to CRU data for downscaling
!!!! NEED TO CONVERT !!!! Need to sustract 273.15 to have data in degrees celsius, not in K anymore

In [None]:
name_variable = 'daily_maximum_near_surface_air_temperature'
temporal_resolution = 'monthly'

### Historical : from 1950 to 2014

In [None]:
out_path = r'\\COWI.net\projects\A245000\A248363\CRVA\Datasets\temperature\Copernicus-CMIP6'

In [None]:
# download only for one model first

model = 'ec_earth3_cc'
name_project = projects['Name'][0]
name_project = name_project.replace('/','')
name_project = name_project.replace(' ','')

path_file = os.path.join(out_path, name_variable,name_project,'historical-1950-2013', temporal_resolution)
path_file_download = os.path.join(path_file,'download.zip')
path_file=path_length(path_file)
path_file_download=path_length(path_file_download)

import cdsapi

c = cdsapi.Client()

c.retrieve(
    'projections-cmip6',
    {
        'format': 'zip',
        'variable': 'daily_maximum_near_surface_air_temperature',
        'temporal_resolution': 'monthly',
        'experiment': 'historical',
        'month': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ],
        'year': [
            '1950', '1951',
            '1952', '1953', '1954',
            '1955', '1956', '1957',
            '1958', '1959', '1960',
            '1961', '1962', '1963',
            '1964', '1965', '1966',
            '1967', '1968', '1969',
            '1970', '1971', '1972',
            '1973', '1974', '1975',
            '1976', '1977', '1978',
            '1979', '1980', '1981',
            '1982', '1983', '1984',
            '1985', '1986', '1987',
            '1988', '1989', '1990',
            '1991', '1992', '1993',
            '1994', '1995', '1996',
            '1997', '1998', '1999',
            '2000', '2001', '2002',
            '2003', '2004', '2005',
            '2006', '2007', '2008',
            '2009', '2010', '2011',
            '2012', '2013',
        ],
        'model': model,
        'area': [
            area_projects[0][0], area_projects[1][0], area_projects[2][0],
            area_projects[3][0],
        ],
    },
    path_file_download)

download_extract(path_file,path_file_download)
os.remove(path_file_download)