In [1]:
import stageemi
import stageemi.dev.visu as dev
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import stageemi.dev.decorator_map as dm
import ipywidgets as widg 
import ipyleaflet as ipyl
import datetime as dt
import os 
from ipywidgets import Text, HTML
from ipyleaflet import WidgetControl
import pandas as pd
from datetime import datetime
import glob 

In [2]:
def choose_zone_timestep(dpt_nb,timestep=1):
    if dpt_nb=="34":
        latmin=43
        latmax=44
        lonmin=2
        lonmax=5
        subsampgeo=1
        subsampt=timestep # make sure this value is odd, because .median (in subset_ds()) works with subsampt=odd nb
        
    elif dpt_nb=="41":
        latmin=47
        latmax=48.5
        lonmin=0
        lonmax=3
        subsampgeo=1
        subsampt=timestep # make sure this value is odd, because .median (in subset_ds()) works with subsampt=odd nb
        
    elif dpt_nb=="38":
        latmin=44.5
        latmax=46
        lonmin=4
        lonmax=7
        subsampgeo=1
        subsampt=timestep # make sure this value is odd, because .median (in subset_ds()) works with subsampt=odd nb
        
    elif dpt_nb=="29":
        latmin=47.5
        latmax=49
        lonmin=-6
        lonmax=-3
        subsampgeo=1
        subsampt=timestep # make sure this value is odd, because .median (in subset_ds()) works with subsampt=odd nb
        
    return latmin,latmax,lonmin,lonmax,subsampgeo,subsampt

In [3]:
def subset_ds(fname,latmin,latmax,lonmin,lonmax,subsampgeo,subsampt):
    ds=xr.open_dataset(fname)#,chunks={"step":1})
    ds_sub=ds.sel(latitude=slice(latmax,latmin,subsampgeo)).sel(longitude=slice(lonmin,lonmax,subsampgeo)).coarsen(step=subsampt,boundary="trim").median()       
    
    """solve the bug observed when multiplying the mask by the dataset"""
    ds_sub['latitude']=ds_sub['latitude'].round(5)
    ds_sub['longitude']=ds_sub['longitude'].round(5)
    return ds_sub

In [4]:
def make_mask(zone,**options):
    
    if options.get("action") == "sympo":
        fname_mask = '../../../StageEMI_masks/Masques_netcdf/ZONE_SYMPO/'+zone+'.nc'
    else:
        fname_mask = '../GeoData/nc_departement/'+zone+'.nc'
    ds_mask = xr.open_dataset(fname_mask)
        
    """solve the bug observed when multiplying the mask by the dataset"""
    ds_mask["latitude"]=ds_mask["latitude"].round(5)
    ds_mask["longitude"]=ds_mask["longitude"].round(5)
    
    lat_center = ds_mask["latitude"].values.mean()
    lon_center = ds_mask["longitude"].values.mean()
#     ds_mask.mask.plot()
    return ds_mask,lat_center,lon_center

In [5]:
def apply_mask(ds_sub,ds_mask):
    
    ds_masked=ds_sub*ds_mask["mask"].squeeze("id")
    """add time coordinates"""
    ds_masked=ds_masked.assign_coords({'time':np.asarray(ds_sub['valid_time'])})
    
    return ds_masked 

In [20]:
def conversion(ds,name):
    """convert wwmf into wme (compas) or w1 (agat) code"""
    
    file_CodesWWMF= '../utils/CodesWWMF.csv'
    df_WWMF = pd.read_csv(file_CodesWWMF,usecols = (0,1,2,3,6,7),sep=',')
        
    if name=="compas":
        var_name="wme_arr"
        col_name="Code WME"
        ds[var_name]=ds.unknown
        
    elif name=="agat":
        var_name="w1_arr"
        col_name="Code W1"
        ds[var_name]=ds.unknown 

    for iwwmf,wwmf in enumerate(df_WWMF["Code WWMF"]):
        #print(wwmf,df_WWMF["Code WME"][iwwmf])
        ds[var_name]=ds[var_name].where(ds.unknown!=wwmf,df_WWMF[col_name][iwwmf])
        
    return ds

def distance(ds,name,**options):
    """calculate the distance between all possible temps sensibles (wme) and the temps sensibles of the zone"""
    
    if name == "compas":
        fname_dist = '../utils/distance_compas.csv'
        df_dist = pd.read_csv(fname_dist,sep=',')
        var_name="wme_arr"
        varsh="wme_c_"
        
    elif name == "agat":
        fname_dist = '../utils/distance_agat.csv'
        df_dist = pd.read_csv(fname_dist,sep=',')  
        var_name="w1_arr"
        varsh="w1_c_"
    
        
    if options.get("action") == "test":
        """used to test over few pixels only in debug mode"""
        for iwme,wme in enumerate(df_dist):
            if iwme>0 and iwme<5:
                # initialize ds["1"] etc
                ds[wme]=ds.wme_arr
                for iiwme,wwme in enumerate(df_dist):
                    if iiwme>0:
                        #print(wme,wwme,iiwme,iwme)
                        #print(df_dist.iloc[iiwme-1,iwme])
                        # for a given wme (e.g. "1") every value in ds["1"] is replaced by the ditance btw wme and wwme
                        ds[wme]=ds[wme].where(ds.wme_arr!=int(wwme),df_dist.iloc[iiwme-1,iwme])  
                        
    else:
        for iw,w in enumerate(df_dist): 
            if iw>0:
                ds[varsh+w]=ds.wme_arr
                for iiw,ww in enumerate(df_dist):
                    if iiw>0:
                        ds[varsh+w]=ds[varsh+w].where(ds[var_name]!=int(ww),df_dist.iloc[iiw-1,iw])       
                    
    return ds  

def shortest_distance_temps_sensible(ds,name):
       
    if name=="compas": 
        varsh="wme_c_"
    elif name=="agat": 
        varsh="w1_c_"
    
    """find all newly added variables linked to wme or w1 resulting from the distance calulation"""
    allvar=list(ds.data_vars)
    list_w=[allvar[i] for i in np.where([varsh in s for s in allvar])[0]]
    ncodes=len(list_w)
    
    nstep=ds.dims["step"]    
    best_w=np.asarray(np.ones((1,nstep))*np.nan)
    
    for istep in range(nstep):
        
        """used for debug"""
        dist_w=np.asarray(np.ones((1,ncodes))*np.nan) 
        
        best_w[0][istep]=list_w[np.asarray([np.sum(ds[w].isel(step=istep)) for iw,w in enumerate(list_w)]).argmin()][len(varsh)::]
        dist_w[0]=np.asarray([np.sum(ds[w].isel(step=istep)) for iw,w in enumerate(list_w)])
        
        ds.attrs[name+" (step"+str(istep)+")"]=dist_w[0]
            
    """write the best wme code for each time step """
    ds.attrs[name]=best_w[0]    
    
    return ds    
    
def calculate_distance(ds,name):
   
    ds=conversion(ds,name)
    ds=distance(ds,name)
    ds=shortest_distance_temps_sensible(ds,name)
    
    return ds

In [7]:
def codes_legendes_wwmf():
    """lecture des codes WWMF pour chercher le temps majoritaire"""
    file_CodesWWMF = '../utils/CodesWWMF.csv'
    df = pd.read_csv(file_CodesWWMF,usecols = (0,1,2,3,6,7),sep=',')
    legende_WWMF = df['Legende WWMF'].to_numpy()
    code_WWMF    = df['Code WWMF'].to_numpy()
    return code_WWMF,legende_WWMF,df

In [8]:
def init_table_scores(ds_masked):
    """initialise the table for the scores"""
    time_array=np.asarray(ds_masked.time.dt.strftime('%Y %m %d %H %M'))
    time_index=[time_array[i] for i in range(len(time_array))]
    df_scores=pd.DataFrame(columns=['WWMF','WME','W1','WWMF vs WME','WWMF vs W1','WWMF vs (WME and W1)'],index=time_index)
    return df_scores 

In [9]:
def store_results(df_scores,df,ds_masked,**options):
    """store coherency results in df_scores"""
    if options.get("action") == "plot": 
        fig,axes = plt.subplots(nrows=8,ncols = 2,figsize=(30,60))
        ax = axes.flat
    
    for i in range(ds_masked.dims['step']):
        istep=i
        val = ds_masked.unknown.isel(step=istep).copy()
        code,leg = majoritaire(val.values, code_WWMF,legende_WWMF)
        
        code_WME=int(ds_masked.attrs["compas"][istep])        
        if code_WME==17 or code_WME>19:
            print("Warning, one code WME is either equal to 17 or greater than the max 19: code WME=",code_WME,"in step:",istep)            
        else:
            leg_WME=np.unique(df[df['Code WME']==code_WME]['Legende WME'])[0]

        code_W1=int(ds_masked.attrs["agat"][istep])
        if code_W1==4 or code_W1==12 or code_W1==15 or code_W1==25 or code_W1==27 or code_W1>28:
            print("Warning, one code W1 is greater than the max 26: code W1=",code_W1,"in step:",istep)            
        else:
            leg_W1=np.unique(df[df['Code W1']==code_W1]['Legende W1'])[0]

        time_step=str(np.asarray(ds_masked.time[istep].dt.strftime('%Y %m %d %H %M')))
        codes_coherency(df,df_scores,time_step,code,code_WME,code_W1)
        
        if options.get("action") == "plot":        
            val.plot.imshow(ax=ax[i],levels=range(0,90))    
            ax[i].set_title("WWMF:"+leg+' '+str(code)+" - WME:"+leg_WME+' '+str(code_WME)+"\nW1:"+leg_W1+' '+str(code_W1)+" Coherency= "+str(df_scores.loc[time_step]["WWMF vs (WME and W1)"]),fontsize=30)
            ax[i].set_xlabel('longitude',fontsize=20)
            ax[i].set_ylabel('latitude',fontsize=20)
    
        
    if options.get("action") == "plot": 
        plt.tight_layout()
        
    return df_scores    

In [10]:
def majoritaire(data, code,legende):
    """code borrowed from Mary to find the majoritary temps sensible within a zone, code can be WWMF or WME"""
    occurence   = np.asarray([np.sum(data == code[i]) for i in range(code.size)])
    code_majoritaire = code[occurence.argmax()]
    leg_majoritaire  = legende[occurence.argmax()]
#     print(np.sort(occurence)[::-1])
#     print(np.argsort(occurence)[::-1])
    return code_majoritaire, leg_majoritaire

In [11]:
def codes_coherency(df,df_s,time_step,code,code_wme,code_w1):
    """fill the dataframe df_s (s for score) with the code values and boolean values that indicate whether they are equal"""
    df_s.loc[time_step]["WWMF"]=code
    df_s.loc[time_step]["WME"]=code_wme
    df_s.loc[time_step]["W1"]=code_w1  
        
    if code_wme==df[df["Code WWMF"]==code]["Code WME"].iloc[0]: df_s.loc[time_step]["WWMF vs WME"]=1
    else: df_s.loc[time_step]["WWMF vs WME"]=0
    
    if code_w1==df[df["Code WWMF"]==code]["Code W1"].iloc[0]: df_s.loc[time_step]["WWMF vs W1"]=1
    else: df_s.loc[time_step]["WWMF vs W1"]=0
    
    df_s.loc[time_step]["WWMF vs (WME and W1)"]=df_s.loc[time_step]["WWMF vs WME"]+df_s.loc[time_step]["WWMF vs W1"]
    
    return df

In [12]:
def save_df(df_scores,res_path,zone):
    """save df_scores in my scores folder"""
    foutname=res_path+zone+"_"+"scores_"+df_scores.index[0].replace(" ","")+".csv"
    print('Currently saving file: '+foutname)
    df_scores.to_csv (foutname, index = True, header=True) 

In [25]:
hr="000000"
#path="/scratch/labia/lepapeb/StageEMI/WWMF/"
path="../WWMF/"
files=sorted(glob.glob(path+"*"+hr+"*"))
files

#20200122 (tempete Gloria)
#20200105 (temps clair)
#20191122 (pluie)

['../WWMF/20200126000000__PG0PAROME__WWMF__EURW1S100______GRILLE____0_48_1__SOL____GRIB2.nc',
 '../WWMF/20200306000000__PG0PAROME__WWMF__EURW1S100______GRILLE____0_48_1__SOL____GRIB2_v2.nc']

In [15]:
dpt_nb=["34","41","29","38"]
zone_dpt=['FRJ13','FRB05','FRH02','FRK24'] #['FRB05' Loir-et-Cher,"FRH02" Finistere,'FRJ13' Herault,'FRK24' Isere]
zone_dpt

['FRJ13', 'FRB05', 'FRH02', 'FRK24']

In [16]:
dpt_choice=dpt_nb[0]
#zone_sympo=[zone.split("ZONE_SYMPO/")[1].split(".nc")[0] for i,zone in enumerate(sorted(glob.glob("../../../StageEMI_masks/Masques_netcdf/ZONE_SYMPO/"+dpt_choice+"*")))]
#zone_sympo

In [17]:
code_WWMF,legende_WWMF,df=codes_legendes_wwmf()

In [26]:
# choose between zone_dpt and zone_sympo
zone_to_process=zone_dpt

In [28]:
for izone, zone in enumerate(zone_to_process):
    
    ds_mask,lat_center,lon_center=make_mask(zone)
    latmin,latmax,lonmin,lonmax,subsampgeo,subsampt=choose_zone_timestep(dpt_nb[izone],timestep=1)
    
#     ds_mask,lat_center,lon_center=make_mask(zone,action="sympo") # if zone_sympo
    
    for ifile,fname in enumerate(files):
        
        ds_sub=subset_ds(fname,latmin,latmax,lonmin,lonmax,subsampgeo,subsampt)  # chuncking does not seem to work properly
        ds_masked=apply_mask(ds_sub,ds_mask)

        ds_masked=calculate_distance(ds_masked,"compas")
        ds_masked=calculate_distance(ds_masked,"agat")

        df_scores=init_table_scores(ds_masked)
        df_scores=store_results(df_scores,df,ds_masked)
        res_path="./scores/"
        save_df(df_scores,res_path,zone)
        del ds_sub,ds_masked,df_scores

Currently saving file: ./scores/FRJ13_scores_202001260100.csv
Currently saving file: ./scores/FRJ13_scores_202003060100.csv
Currently saving file: ./scores/FRB05_scores_202001260100.csv
Currently saving file: ./scores/FRB05_scores_202003060100.csv
Currently saving file: ./scores/FRH02_scores_202001260100.csv
Currently saving file: ./scores/FRH02_scores_202003060100.csv
Currently saving file: ./scores/FRK24_scores_202001260100.csv
Currently saving file: ./scores/FRK24_scores_202003060100.csv
