Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jan 5, 2022
1 parent e02d8c8 commit c2f7126
Show file tree
Hide file tree
Showing 20 changed files with 313 additions and 271 deletions.
4 changes: 1 addition & 3 deletions builders/cesm-stratus.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def build_catalog(fs, bucket='ncar-cesm-lens'):
S3_URL = 'https://stratus.ucar.edu'
# fs = fsspec.filesystem('s3', secret=os.environ['STRATUS_SECRET_KEY'], key=os.environ['STRATUS_ACCESS_KEY'],
# anon=False, client_kwargs={'endpoint_url':S3_URL})
fs = fsspec.filesystem(
's3', profile='stratus-cesm', anon=False, client_kwargs={'endpoint_url': S3_URL}
)
fs = fsspec.filesystem('s3', profile='stratus-cesm', anon=False, client_kwargs={'endpoint_url': S3_URL})

df = build_catalog(fs)
df.to_csv('../catalogs/stratus-cesm1-le.csv', index=False)
4 changes: 3 additions & 1 deletion builders/cesm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

component_streams = cesm2_cmip6_definitions['component_streams']
cesm2_cmip6_exps = CaseInsensitiveDict(cesm2_cmip6_definitions['experiments'])
date_str_regex = r'\d{4}\-\d{4}|\d{6}\-\d{6}|\d{8}\-\d{8}|\d{10}Z\-\d{10}Z|\d{12}Z\-\d{12}Z|\d{10}\-\d{10}|\d{12}\-\d{12}'
date_str_regex = (
r'\d{4}\-\d{4}|\d{6}\-\d{6}|\d{8}\-\d{8}|\d{10}Z\-\d{10}Z|\d{12}Z\-\d{12}Z|\d{10}\-\d{10}|\d{12}\-\d{12}'
)


def cesm2_cmip6_parser(filepath):
Expand Down
3 changes: 1 addition & 2 deletions builders/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def __init__(self, columns, exclude_patterns=[]):

def _filter_func(self, filelist):
return not any(
fnmatch.fnmatch(filelist, pat=exclude_pattern)
for exclude_pattern in self.exclude_patterns
fnmatch.fnmatch(filelist, pat=exclude_pattern) for exclude_pattern in self.exclude_patterns
)

def _update_dict(self, entry):
Expand Down
3 changes: 2 additions & 1 deletion builders/notebooks/aws-cesm1-le_catalog_builder.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"import pandas as pd\n",
"from intake.source.utils import reverse_format\n",
"from tqdm.auto import tqdm\n",
"import s3fs "
"import s3fs"
]
},
{
Expand Down Expand Up @@ -161,6 +161,7 @@
"source": [
"template = \"s3://ncar-cesm-lens/{component}/{frequency}/cesmLE-{experiment}-{variable}.zarr\"\n",
"\n",
"\n",
"def get_attrs(store):\n",
" f = reverse_format(template, store)\n",
" f['path'] = store\n",
Expand Down
61 changes: 38 additions & 23 deletions builders/notebooks/aws-cordex_catalog_builder.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"import pandas as pd\n",
"from intake.source.utils import reverse_format\n",
"from tqdm.auto import tqdm\n",
"import s3fs \n",
"import s3fs\n",
"\n",
"import xarray as xr"
]
Expand Down Expand Up @@ -45,7 +45,7 @@
"outputs": [],
"source": [
"def get_file_list(fs):\n",
" \n",
"\n",
" # Remove 'catalogs' directory from top level listing to get frequencies\n",
" frequencies = [f for f in fs.ls(root) if 'catalogs' not in f]\n",
"\n",
Expand Down Expand Up @@ -125,9 +125,11 @@
"source": [
"def get_filename_attrs(store, fs=fs):\n",
"\n",
" template = \"s3://ncar-na-cordex/{frequency}/{variable}.{scenario}.{frequency}.{grid}.{bias_correction}.zarr\"\n",
" template = (\n",
" \"s3://ncar-na-cordex/{frequency}/{variable}.{scenario}.{frequency}.{grid}.{bias_correction}.zarr\"\n",
" )\n",
" attrs = reverse_format(template, store)\n",
" \n",
"\n",
" # Obtain spatial_resolution from grid; validate correct string values.\n",
" if attrs['grid'] == 'NAM-22i':\n",
" attrs['spatial_resolution'] = '0.25 deg'\n",
Expand All @@ -136,7 +138,7 @@
" else:\n",
" value = attrs['grid']\n",
" print(f'Unknown grid value: {value}')\n",
" assert(False)\n",
" assert False\n",
"\n",
" attrs['path'] = store\n",
" return attrs"
Expand All @@ -156,19 +158,19 @@
"\n",
" store = s3fs.S3Map(root=store_path, s3=fs)\n",
" ds = xr.open_zarr(store)\n",
" attrs = {'long_name': ds[var_name].attrs['long_name'], \n",
" 'units': ds[var_name].attrs['units'], \n",
" 'standard_name': ds[var_name].attrs['standard_name'], \n",
" 'spatial_domain': 'north_america', \n",
" 'vertical_levels': 1, \n",
" 'start_time': pd.to_datetime(str(ds['time'].values[0])).isoformat(), \n",
" 'end_time': pd.to_datetime(str(ds['time'].values[-1])).isoformat(), \n",
" 'na-cordex-models': list(ds.coords['member_id'].values)\n",
" }\n",
" attrs = {\n",
" 'long_name': ds[var_name].attrs['long_name'],\n",
" 'units': ds[var_name].attrs['units'],\n",
" 'standard_name': ds[var_name].attrs['standard_name'],\n",
" 'spatial_domain': 'north_america',\n",
" 'vertical_levels': 1,\n",
" 'start_time': pd.to_datetime(str(ds['time'].values[0])).isoformat(),\n",
" 'end_time': pd.to_datetime(str(ds['time'].values[-1])).isoformat(),\n",
" 'na-cordex-models': list(ds.coords['member_id'].values),\n",
" }\n",
"\n",
" attrs.update(metadata)\n",
" return attrs\n",
" "
" return attrs"
]
},
{
Expand Down Expand Up @@ -474,10 +476,23 @@
"df = pd.DataFrame(entries)\n",
"\n",
"# Reorder catalog columns\n",
"catalog_order = ['variable', 'standard_name', 'long_name', 'units', 'spatial_domain', \n",
" 'grid', 'spatial_resolution', 'scenario', 'start_time', 'end_time',\n",
" 'frequency', 'vertical_levels', 'bias_correction', 'na-cordex-models',\n",
" 'path']\n",
"catalog_order = [\n",
" 'variable',\n",
" 'standard_name',\n",
" 'long_name',\n",
" 'units',\n",
" 'spatial_domain',\n",
" 'grid',\n",
" 'spatial_resolution',\n",
" 'scenario',\n",
" 'start_time',\n",
" 'end_time',\n",
" 'frequency',\n",
" 'vertical_levels',\n",
" 'bias_correction',\n",
" 'na-cordex-models',\n",
" 'path',\n",
"]\n",
"df = df.reindex(columns=catalog_order)\n",
"\n",
"df.head()"
Expand All @@ -490,10 +505,10 @@
"outputs": [],
"source": [
"# Make 'path' the final column in the DataFrame\n",
"#path = df.pop('path')\n",
"#df['path'] = path\n",
"# path = df.pop('path')\n",
"# df['path'] = path\n",
"\n",
"#df.head()"
"# df.head()"
]
},
{
Expand Down
81 changes: 46 additions & 35 deletions builders/notebooks/aws-dart_catalog_builder.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"import pandas as pd\n",
"from intake.source.utils import reverse_format\n",
"from tqdm.auto import tqdm\n",
"#import s3fs \n",
"\n",
"# import s3fs\n",
"\n",
"import os\n",
"from pathlib import Path\n",
Expand All @@ -37,11 +38,12 @@
"metadata": {},
"outputs": [],
"source": [
"variables = {'atm': {'PS', 'Q', 'T', 'US', 'VS', 'CLDLIQ', 'CLDICE'},\n",
" 'lnd': {'ER', 'HR', 'TSA', 'EFLX_LH_TOT'}}\n",
"variables = {\n",
" 'atm': {'PS', 'Q', 'T', 'US', 'VS', 'CLDLIQ', 'CLDICE'},\n",
" 'lnd': {'ER', 'HR', 'TSA', 'EFLX_LH_TOT'},\n",
"}\n",
"\n",
"frequencies = {'atm': 'weekly', \n",
" 'lnd': 'hourly6'}"
"frequencies = {'atm': 'weekly', 'lnd': 'hourly6'}"
]
},
{
Expand All @@ -65,7 +67,7 @@
"zarr_dir = Path('/glade/scratch/bonnland/DART/ds345.0/zarr-publish/')\n",
"\n",
"# Use if pulling Zarr metadata from AWS\n",
"#fs = s3fs.S3FileSystem(anon=True)"
"# fs = s3fs.S3FileSystem(anon=True)"
]
},
{
Expand All @@ -82,9 +84,9 @@
"outputs": [],
"source": [
"def get_file_list(store_path):\n",
" \n",
"\n",
" store_path_str = store_path.as_posix()\n",
" \n",
"\n",
" # Remove 'catalogs' directory from top level listing to get frequencies\n",
" frequencies = [f for f in os.listdir(store_path_str) if 'catalogs' not in f]\n",
"\n",
Expand Down Expand Up @@ -146,11 +148,11 @@
}
],
"source": [
"#stores = get_file_list(fs)\n",
"# stores = get_file_list(fs)\n",
"stores = get_file_list(zarr_dir)\n",
"\n",
"stores = [f\"{zarr_dir}/{store}\" for store in stores ]\n",
"#stores = [f\"{s3_root}{store}\" for store in stores]\n",
"stores = [f\"{zarr_dir}/{store}\" for store in stores]\n",
"# stores = [f\"{s3_root}{store}\" for store in stores]\n",
"stores"
]
},
Expand All @@ -167,14 +169,14 @@
"metadata": {},
"outputs": [],
"source": [
"#def get_filename_attrs(store, fs=fs):\n",
"# def get_filename_attrs(store, fs=fs):\n",
"def get_filename_attrs(store):\n",
"\n",
" relative_path = os.path.relpath(store, start=zarr_dir)\n",
" print(relative_path)\n",
" template = \"{frequency}/{variable}.zarr\"\n",
" attrs = reverse_format(template, relative_path)\n",
" \n",
"\n",
" # Obtain spatial_resolution from grid; validate correct string values.\n",
" # if attrs['grid'] == 'NAM-22i':\n",
" # attrs['spatial_resolution'] = '0.25 deg'\n",
Expand All @@ -201,24 +203,24 @@
" metadata = get_filename_attrs(store_path)\n",
" var_name = metadata['variable']\n",
"\n",
" #store = s3fs.S3Map(root=store_path, s3=fs)\n",
" #ds = xr.open_zarr(store)\n",
" # store = s3fs.S3Map(root=store_path, s3=fs)\n",
" # ds = xr.open_zarr(store)\n",
"\n",
" ds = xr.open_zarr(store_path)\n",
" \n",
" attrs = {'long_name': ds[var_name].attrs['long_name'], \n",
" 'units': ds[var_name].attrs['units'], \n",
" 'component': 'atm' if var_name in variables['atm'] else 'lnd',\n",
" 'standard_name': 'unspecified', \n",
" 'spatial_domain': 'global', \n",
" 'vertical_levels': 1 if ('lev' not in ds[var_name].dims) else ds.sizes['lev'], \n",
" 'start_time': pd.to_datetime(str(ds['time'].values[0])).isoformat(), \n",
" 'end_time': pd.to_datetime(str(ds['time'].values[-1])).isoformat(), \n",
" }\n",
"\n",
" attrs = {\n",
" 'long_name': ds[var_name].attrs['long_name'],\n",
" 'units': ds[var_name].attrs['units'],\n",
" 'component': 'atm' if var_name in variables['atm'] else 'lnd',\n",
" 'standard_name': 'unspecified',\n",
" 'spatial_domain': 'global',\n",
" 'vertical_levels': 1 if ('lev' not in ds[var_name].dims) else ds.sizes['lev'],\n",
" 'start_time': pd.to_datetime(str(ds['time'].values[0])).isoformat(),\n",
" 'end_time': pd.to_datetime(str(ds['time'].values[-1])).isoformat(),\n",
" }\n",
"\n",
" attrs.update(metadata)\n",
" return attrs\n",
" "
" return attrs"
]
},
{
Expand Down Expand Up @@ -269,7 +271,7 @@
],
"source": [
"# Check validity with first few stores\n",
"#stores = stores[0:5]\n",
"# stores = stores[0:5]\n",
"stores"
]
},
Expand Down Expand Up @@ -511,10 +513,19 @@
"df = pd.DataFrame(entries)\n",
"\n",
"# Reorder catalog columns\n",
"catalog_order = ['variable', 'long_name', 'units', 'standard_name', 'vertical_levels', \n",
" 'component', 'spatial_domain', \n",
" 'start_time', 'end_time',\n",
" 'frequency', 'path']\n",
"catalog_order = [\n",
" 'variable',\n",
" 'long_name',\n",
" 'units',\n",
" 'standard_name',\n",
" 'vertical_levels',\n",
" 'component',\n",
" 'spatial_domain',\n",
" 'start_time',\n",
" 'end_time',\n",
" 'frequency',\n",
" 'path',\n",
"]\n",
"df = df.reindex(columns=catalog_order)\n",
"\n",
"df.head()"
Expand All @@ -527,10 +538,10 @@
"outputs": [],
"source": [
"# Make 'path' the final column in the DataFrame\n",
"#path = df.pop('path')\n",
"#df['path'] = path\n",
"# path = df.pop('path')\n",
"# df['path'] = path\n",
"\n",
"#df.head()"
"# df.head()"
]
},
{
Expand Down
Loading

0 comments on commit c2f7126

Please sign in to comment.