## make dico path
Construct a pandas dataframe with paths for the data from eNATL
* dataframe is constructed with "gridS" files as a reference
* mapping dictionnaries to go from variable name to file name is provided as attributes
* see itidenatl.utils routines

In [1]:
from pathlib import Path
import itidenatl.utils as ut
import pandas as pd

### Create dataframe

In [2]:
filist = ut.make_list_files()

date = [k.name.rstrip(".nc")[-8:] for k in filist]

df = pd.DataFrame( {"date":date, "full_path":filist})\
        .sort_values("date").reset_index().drop(columns="index")
df

Unnamed: 0,date,full_path
0,20090630,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
1,20090701,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
2,20090702,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
3,20090703,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
4,20090704,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
...,...,...
482,20101025,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
483,20101026,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
484,20101027,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...
485,20101028,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...


### Add attributes

In [3]:
df.attrs["data_path"] = ut.raw_data_dir
df.attrs["var_ref"] = "gridS"
df.attrs["mapname_filtovar"] = ut.vmapping
df.attrs["mapname_vartofil"] = {v:k for k,v in ut.vmapping.items()}

### Add various fields that may be useful

In [4]:
df["filename"] = df.full_path.apply(lambda x: x.name)
df["parent"] = df.full_path.apply(lambda x: x.parent.name)
df["runseg"] = df.full_path.apply(lambda x: x.parent.parent.name)
df

Unnamed: 0,date,full_path,filename,parent,runseg
0,20090630,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
1,20090701,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
2,20090702,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
3,20090703,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
4,20090704,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
...,...,...,...,...,...
482,20101025,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S
483,20101026,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S
484,20101027,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S
485,20101028,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S


### store result
N.B.: pathlib.path will be converted to str (column "full_path")

In [6]:
df.to_json(ut.dico_path, default_handler=str)

## end of notebook
now working on itidenatl.utils routines

In [2]:
df = pd.read_json(ut.dico_path)

In [4]:
df.iloc[100:110]

Unnamed: 0,date,full_path,filename,parent,runseg
100,20091008,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091008_20091012_gridS_2009...,00604801-00615600,eNATL60-BLBT02-S
101,20091009,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091008_20091012_gridS_2009...,00604801-00615600,eNATL60-BLBT02-S
102,20091010,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091008_20091012_gridS_2009...,00604801-00615600,eNATL60-BLBT02-S
103,20091011,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091008_20091012_gridS_2009...,00604801-00615600,eNATL60-BLBT02-S
104,20091012,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091008_20091012_gridS_2009...,00604801-00615600,eNATL60-BLBT02-S
105,20091013,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091013_20091017_gridS_2009...,00615601-00626400,eNATL60-BLBT02-S
106,20091014,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091013_20091017_gridS_2009...,00615601-00626400,eNATL60-BLBT02-S
107,20091015,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091013_20091017_gridS_2009...,00615601-00626400,eNATL60-BLBT02-S
108,20091016,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091013_20091017_gridS_2009...,00615601-00626400,eNATL60-BLBT02-S
109,20091017,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091013_20091017_gridS_2009...,00615601-00626400,eNATL60-BLBT02-S


In [4]:
ut.get_eNATL_path("votemper", [5,6])

[PosixPath('/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLBT02-S/00399601-00410400/eNATL60-BLBT02_1h_20090705_20090709_gridT_20090705-20090705.nc'),
 PosixPath('/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLBT02-S/00399601-00410400/eNATL60-BLBT02_1h_20090705_20090709_gridT_20090706-20090706.nc')]

In [17]:
### get list of subdirectories

df.runseg.unique()

array(['eNATL60-BLBT02-S', 'eNATL60-BLBT02X-S'], dtype=object)

In [3]:
df[df.runseg==df.runseg.unique()[0]]

Unnamed: 0,date,full_path,filename,parent,runseg
0,20090630,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
1,20090701,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
2,20090702,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
3,20090703,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
4,20090704,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
...,...,...,...,...,...
125,20091102,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091102_20091106_gridS_2009...,00658801-00669600,eNATL60-BLBT02-S
126,20091103,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091102_20091106_gridS_2009...,00658801-00669600,eNATL60-BLBT02-S
127,20091104,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091102_20091106_gridS_2009...,00658801-00669600,eNATL60-BLBT02-S
128,20091105,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20091102_20091106_gridS_2009...,00658801-00669600,eNATL60-BLBT02-S


In [8]:
df[df.parent==df.parent.unique()[11]]

Unnamed: 0,date,full_path,filename,parent,runseg
55,20090824,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090824_20090828_gridS_2009...,00507601-00518400,eNATL60-BLBT02-S
56,20090825,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090824_20090828_gridS_2009...,00507601-00518400,eNATL60-BLBT02-S
57,20090826,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090824_20090828_gridS_2009...,00507601-00518400,eNATL60-BLBT02-S
58,20090827,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090824_20090828_gridS_2009...,00507601-00518400,eNATL60-BLBT02-S
59,20090828,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090824_20090828_gridS_2009...,00507601-00518400,eNATL60-BLBT02-S


In [10]:
df

Unnamed: 0,date,full_path,filename,parent,runseg
0,20090630,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
1,20090701,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
2,20090702,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
3,20090703,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
4,20090704,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02_1h_20090630_20090704_gridS_2009...,00388801-00399600,eNATL60-BLBT02-S
...,...,...,...,...,...
482,20101025,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S
483,20101026,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S
484,20101027,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S
485,20101028,/work/CT1/hmg2840/lbrodeau/eNATL60/eNATL60-BLB...,eNATL60-BLBT02X_1h_20101006_20101029_gridS_201...,01393201-01440721,eNATL60-BLBT02X-S


In [96]:
df.iloc[2]["date"].to_list()

AttributeError: 'numpy.int64' object has no attribute 'to_list'