# CLINT WP5 : Define drivers, lags and where to find them
### 2023/08/21

## Adapted to clusters of new experiments
### 2023/10/23 - ongoing

In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
## DIRECTORIES
machine = 'juno'

if machine == 'laptop':
    metadatadir = '~/Documents/CMCC-GoogleDrive/ERA5/'
    
if machine == 'workstation':
    metadatadir = '~/Documents/CLINT-GoogleDrive/ERA5/'
    
if machine == 'juno':
    homedir = '/users-home/csp/as18623/'
    workmetadir = '/work/csp/as18623/CLINT_metadata/'
    metadatadir = '/data/csp/as18623/CLINT_metadata/'

In [3]:
experiment = '3low'

In [4]:
if experiment == '3low':


    ### Lags for drivers
    lag_tmax_EU1 = np.arange(0,16)  ## Central Europe
    lag_tmax_EU2 = np.arange(30,41) ## North Atlantic
    lag_mslp_EU1 = np.arange(0,8)   ## Mediterranean
    lag_mslp_EU2 = np.arange(70,86) ## Eastern Europe
    lag_mslp_WO2 = np.arange(40,61) ## Greenland
    lag_sm_EU1 = np.arange(0,26)    ## Eastern Europe
    lag_sic_AC1 = np.arange(30,51)  ## North Atlantic and North Pacific


    drivers = pd.DataFrame({'var':['tmax','tmax','mslp','mslp','mslp','sm','sic'],#name of variable
                           'era5_var':['mx2t','mx2t','msl','msl','msl','swvl1','sic'], #name of variable in ERA5
                           'cmip6_var':['tasmax','tasmax','psl','psl','psl','mrso','sic'], #name of variable in CMIP6
                           'test_nr':[3,3,3,3,3,3,3], #id number of experimen
                           'cluster':['EU1t','EU2t','EU1p','EU2p','WO2p','EU1sm','AC1sic'], #code of cluster
                           'cluster_info':['Central Europe','North Atlantic','Mediterranean','Eastern Europe',
                                           'Greenland','Eastern Europe','North Atlantic and North Pacific'], #name of cluster
                           'minlag':[0,30,0,70,40,0,30], #minimum lead time to be considered
                           'maxlag':[16,41,8,86,61,26,51], #maximum lead time
                           'cluster_centre_lon':[14,-6,4,32,-78,24,np.nan], #centroid of the cluster, longitude
                           'cluster_centre_lat':[53,42,36,54,80,56,np.nan], #centroid of the cluster, latitude
                           'cl_ortho_lon':[14,0,14,29,-56,30,np.nan], #figure param: centre of projection, lon
                           'cl_ortho_lat':[49,50,40,51,72,54,np.nan], # as above but lat
                           'cl_ext_W':[-2,-18,-16,12,-85,14,np.nan], #fig param: extremes of clusters
                           'cl_ext_E':[30,18,44,46,-20,46,np.nan],
                           'cl_ext_S':[38,28,25,44,48,46,np.nan],
                           'cl_ext_N':[60,72,50,63,90,62,np.nan],
                           'text_plot_lon':[28,20,np.nan,np.nan,-35,np.nan,np.nan],# fig param: where to plot the lead time
                           'text_plot_lat':[58,65,np.nan,np.nan,55,np.nan,np.nan],
                           'fig_width':[60,60,60,60,72,60,np.nan], #figure width
                           'ax_width':[12,12,20,12,12,15,np.nan], #width of the plots that compose the figures
                           'ax_height':[10,16,8,8,10,10,np.nan], #height of the plots that compose the figures
                           'clmask_test3':['labels7t2mEurope5.csv','labels7t2mEurope5.csv',
                                           'labels1mslpEurope5.csv','labels1mslpEurope5.csv',
                                           'labels2mslpWorld5.csv','labels3sm1Europe5.csv',np.nan], 
                            #name of the file with cluster details
                           'cl_nr':[1,2,1,2,2,1,1], #number of the cluster as it appears in the file above
                           'vmin':[-10,-10,-20,-20,-20,-0.2,-0.2], #minimum value for the maps
                           'vmax':[10,10,20,20,20,0.2,0.2], #maximum value for the maps
                           'empty':[np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]}) 
    drivers['meanlag'] = np.floor(drivers[['minlag', 'maxlag']].mean(axis=1)).astype(int)
    drivers.to_csv(f'{workmetadir}drivers_Test3low.csv', index=False)
    #drivers.to_csv(f'{metadatadir}drivers.csv', index=False)

In [5]:
drivers

Unnamed: 0,var,era5_var,cmip6_var,test_nr,cluster,cluster_info,minlag,maxlag,cluster_centre_lon,cluster_centre_lat,...,text_plot_lat,fig_width,ax_width,ax_height,clmask_test3,cl_nr,vmin,vmax,empty,meanlag
0,tmax,mx2t,tasmax,3,EU1t,Central Europe,0,16,14.0,53.0,...,58.0,60.0,12.0,10.0,labels7t2mEurope5.csv,1,-10.0,10.0,,8
1,tmax,mx2t,tasmax,3,EU2t,North Atlantic,30,41,-6.0,42.0,...,65.0,60.0,12.0,16.0,labels7t2mEurope5.csv,2,-10.0,10.0,,35
2,mslp,msl,psl,3,EU1p,Mediterranean,0,8,4.0,36.0,...,,60.0,20.0,8.0,labels1mslpEurope5.csv,1,-20.0,20.0,,4
3,mslp,msl,psl,3,EU2p,Eastern Europe,70,86,32.0,54.0,...,,60.0,12.0,8.0,labels1mslpEurope5.csv,2,-20.0,20.0,,78
4,mslp,msl,psl,3,WO2p,Greenland,40,61,-78.0,80.0,...,55.0,72.0,12.0,10.0,labels2mslpWorld5.csv,2,-20.0,20.0,,50
5,sm,swvl1,mrso,3,EU1sm,Eastern Europe,0,26,24.0,56.0,...,,60.0,15.0,10.0,labels3sm1Europe5.csv,1,-0.2,0.2,,13
6,sic,sic,sic,3,AC1sic,North Atlantic and North Pacific,30,51,,,...,,,,,,1,-0.2,0.2,,40


In [6]:
experiment = '5low'

In [None]:
#if experiment == '5low':
      

In [7]:
   drivers_tmpl = pd.DataFrame({'var':[np.nan],#name of variable
                           'era5_var':[np.nan], #name of variable in ERA5
                           'cmip6_var':[np.nan], #name of variable in CMIP6
                           'exp':[np.nan], #id number of experimen
                           'domain':[np.nan], #Europe or World or North_Atlantic ot else
                           'cl_nr':[np.nan], #number of the cluster as it appears in the file above
                           'cl_code':[np.nan], #code of cluster
                           'cl_info':[np.nan], #name of cluster
                           'clmask_file':[np.nan], 
                            #name of the file with cluster details
                           'minlag':[np.nan], #minimum lead time to be considered
                           'maxlag':[np.nan], #maximum lead time
                           'cl_centroid_lon':[np.nan], #centroid of the cluster, longitude
                           'cl_centroid_lat':[np.nan], #centroid of the cluster, latitude
                           'cl_ortho_lon':[np.nan], #figure param: centre of projection, lon
                           'cl_ortho_lat':[np.nan], # as above but lat
                           'cl_ext_W':[np.nan], #fig param: extremes of clusters
                           'cl_ext_E':[np.nan],
                           'cl_ext_S':[np.nan],
                           'cl_ext_N':[np.nan],
                           'text_plot_lon':[np.nan],# fig param: where to plot the lead time
                           'text_plot_lat':[np.nan],
                           'fig_width':[np.nan], #figure width
                           'ax_width':[np.nan], #width of the plots that compose the figures
                           'ax_height':[np.nan], #height of the plots that compose the figures
                           'vmin':[np.nan], #minimum value for the maps
                           'vmax':[np.nan], #maximum value for the maps
                           'empty':[np.nan]}) 

In [8]:
drivers_tmpl

Unnamed: 0,var,era5_var,cmip6_var,test_nr,domain,cl_nr,cluster,cluster_info,clmask_test3,minlag,...,cl_ext_S,cl_ext_N,text_plot_lon,text_plot_lat,fig_width,ax_width,ax_height,vmin,vmax,empty
0,,,,,,,,,,,...,,,,,,,,,,


In [27]:
    #nr_cl = 3

In [28]:
    #empty = np.repeat(np.nan,nr_cl)

In [9]:
    ## these information should be read from a csv created by the algorithm
    drivers_input = pd.DataFrame({'var':['mslp'],
                                  'domain':['Europe'],
                                  'cl_code':['mslEurope_cluster3'],
                                  'cl_nr':[3],
                                  'minlag':[28],
                                  'maxlag':[48]})

In [10]:
    drivers_addrow = drivers_tmpl
    

In [13]:
    for clm in drivers_input.columns:
        drivers_addrow[clm] = drivers_input[clm]
        
    if drivers_addrow['var'][0] == 'mslp':
        drivers_addrow['era5_var'] = 'msl'
        drivers_addrow['cmip6_var'] = 'pls'
        drivers_addrow['vmin'] = -20
        drivers_addrow['vmax'] = 20
    
    drivers_addrow['']
    drivers_addrow['exp'] = experiment
        

In [12]:
drivers_addrow

Unnamed: 0,var,era5_var,cmip6_var,test_nr,domain,cl_nr,cluster,cluster_info,clmask_test3,minlag,...,cl_ext_S,cl_ext_N,text_plot_lon,text_plot_lat,fig_width,ax_width,ax_height,vmin,vmax,empty
0,mslp,msl,pls,,Europe,3,,,,28,...,,,,,,,,-20,20,


In [None]:
   
        

    
        drivers = pd.DataFrame({'var':['tmax','tmax','mslp','mslp','mslp','sm','sic'],#name of variable
                           'era5_var':['mx2t','mx2t','msl','msl','msl','swvl1','sic'], #name of variable in ERA5
                           'cmip6_var':['tasmax','tasmax','psl','psl','psl','mrso','sic'], #name of variable in CMIP6
                           'test_nr':[3,3,3,3,3,3,3], #id number of experimen
                           'cluster':['EU1t','EU2t','EU1p','EU2p','WO2p','EU1sm','AC1sic'], #code of cluster
                           'cluster_info':['Central Europe','North Atlantic','Mediterranean','Eastern Europe',
                                           'Greenland','Eastern Europe','North Atlantic and North Pacific'], #name of cluster
                           'minlag':[0,30,0,70,40,0,30], #minimum lead time to be considered
                           'maxlag':[16,41,8,86,61,26,51], #maximum lead time
                           'cluster_centre_lon':[14,-6,4,32,-78,24,np.nan], #centroid of the cluster, longitude
                           'cluster_centre_lat':[53,42,36,54,80,56,np.nan], #centroid of the cluster, latitude
                           'cl_ortho_lon':[14,0,14,29,-56,30,np.nan], #figure param: centre of projection, lon
                           'cl_ortho_lat':[49,50,40,51,72,54,np.nan], # as above but lat
                           'cl_ext_W':[-2,-18,-16,12,-85,14,np.nan], #fig param: extremes of clusters
                           'cl_ext_E':[30,18,44,46,-20,46,np.nan],
                           'cl_ext_S':[38,28,25,44,48,46,np.nan],
                           'cl_ext_N':[60,72,50,63,90,62,np.nan],
                           'text_plot_lon':[28,20,np.nan,np.nan,-35,np.nan,np.nan],# fig param: where to plot the lead time
                           'text_plot_lat':[58,65,np.nan,np.nan,55,np.nan,np.nan],
                           'fig_width':[60,60,60,60,72,60,np.nan], #figure width
                           'ax_width':[12,12,20,12,12,15,np.nan], #width of the plots that compose the figures
                           'ax_height':[10,16,8,8,10,10,np.nan], #height of the plots that compose the figures
                           'clmask_test3':['labels7t2mEurope5.csv','labels7t2mEurope5.csv',
                                           'labels1mslpEurope5.csv','labels1mslpEurope5.csv',
                                           'labels2mslpWorld5.csv','labels3sm1Europe5.csv',np.nan], 
                            #name of the file with cluster details
                           'cl_nr':[1,2,1,2,2,1,1], #number of the cluster as it appears in the file above
                           'vmin':[-10,-10,-20,-20,-20,-0.2,-0.2], #minimum value for the maps
                           'vmax':[10,10,20,20,20,0.2,0.2], #maximum value for the maps
                           'empty':[np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]}) 

In [5]:
#if machine == 'juno':
#    drivers.to_csv(f'{workdir}drivers.csv', index=False)
#    os.rename(f'{workdir}drivers.csv',f'{metadatadir}drivers.csv')

OSError: [Errno 18] Invalid cross-device link: '/work/csp/as18623/drivers.csv' -> '/data/csp/as18623/CLINT_metadata/drivers.csv'

OSError: [Errno 18] Invalid cross-device link: '/work/csp/as18623/drivers.csv' -> '/data/csp/as18623/CLINT_metadata/drivers.csv'