# City Planner Application

The eventual goal is to have a GUI-based application that can produce a variety of plots that the climate impacts community would find useful.   One of these plots is "Number of Days with Temperature over X Degrees" for some GPS location and time range.

The application should eventually allow choices for:
  * Data source (LENS, CMIP6, NA-CORDEX, etc.)
  * Climate Application  (Extreme Heat Events, Growing Season, Extreme Precipitation Events, etc.)
  * GPS location if applicable
  * Time range
  * Units (Fahrenheit vs. Celsius, etc.)
  * Plot type (Scatter plots vs Box plots, etc.)

In [None]:
# Display output of plots directly in Notebook
%matplotlib inline

In [None]:
import xarray as xr
import numpy as np
import intake
from tqdm.auto import tqdm
import shutil 
import os
from functools import reduce
import pprint
import json
from operator import mul
import random
import yaml
from distributed.utils import format_bytes
import dask

In [None]:
cluster.close()

In [None]:
# Create cluster
from dask_gateway import Gateway
from dask.distributed import Client
gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=2, maximum=100)
# Connect to cluster
client = Client(cluster)
# Display cluster dashboard URL
cluster

### Starting Point:  Load max daily temperatures from CESM LENS Data

In [None]:

# Load the data catalog.  Use a field separator character for key generation that does not conflict with strings in the catalog.
intakeEsmUrl = 'https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json'
col = intake.open_esm_datastore(intakeEsmUrl)

# List a few catalog entries for an idea of the catalog structure.  
# Note: in general, each catalog has different columns.
col.df.head()

In [None]:
# Discover what is available for Daily Temperature at Reference Height (TREFHT)
subset = col.search(variable='TREFHTMX')
subset.unique(columns=['experiment', 'component','frequency'])

In [None]:
# Let's limit the date range to [1850, 2100] by choosing a subset of experiments, and choose the "daily" output stream.
subset = col.search(variable='TREFHTMX', experiment=['20C','RCP85'], frequency='daily')
subset.df

### Create Xarray Objects from this data subset

Note that the operation that merges data into Xarray Objects is relatively expensive, so we create a Dask cluster to process it quickly.

In [None]:
# Load catalog entries for subset into a dictionary of xarray datasets
dsets = subset.to_dataset_dict(zarr_kwargs={"consolidated": True}, storage_options={"anon": True})
print("\nDataset dictionary keys:\n", dsets.keys())

In [None]:
daily_temps_1850_2005 = dsets['atm.20C.daily']
daily_temps_2006_2100 = dsets['atm.RCP85.daily']

### Define cities and years of interest. 

In [51]:
cities = {"Boulder": (40.01, 244.8), "Austin": (30.27, 262.26), "Chicago": (41.88, 272.37)}

years_past = np.arange(1920, 2001, 10)
years_fut = np.arange(2010, 2101, 10)

In [55]:
# Produce CSV files for specific cities and years.

for city in cities.keys():
    
    (lat, lon) = cities[city]
    cell_temps_past = daily_temps_1850_2005.sel(lat=lat, lon=lon, method='nearest')
    cell_temps_fut = daily_temps_2006_2100.sel(lat=lat, lon=lon, method='nearest')
    
    for year in years_past:
        temps = cell_temps_past.sel(time=slice(f'{year}-01-01', f'{year}-12-31'))
        df = temps.TREFHTMX.to_dataframe()
        outfile = f'{city}_TMAX_{year}.csv'
        df.to_csv(outfile, index=True)
        print(outfile)
        
    for year in years_fut:
        temps = cell_temps_fut.sel(time=slice(f'{year}-01-01', f'{year}-12-31'))
        df = temps.TREFHTMX.to_dataframe()
        outfile = f'{city}_TMAX_{year}.csv'
        df.to_csv(outfile, index=True)
        print(outfile)

Boulder_TMAX_1920.csv
Boulder_TMAX_1930.csv
Boulder_TMAX_1940.csv
Boulder_TMAX_1950.csv
Boulder_TMAX_1960.csv
Boulder_TMAX_1970.csv
Boulder_TMAX_1980.csv
Boulder_TMAX_1990.csv
Boulder_TMAX_2000.csv
Boulder_TMAX_2010.csv
Boulder_TMAX_2020.csv
Boulder_TMAX_2030.csv
Boulder_TMAX_2040.csv
Boulder_TMAX_2050.csv
Boulder_TMAX_2060.csv
Boulder_TMAX_2070.csv
Boulder_TMAX_2080.csv
Boulder_TMAX_2090.csv
Boulder_TMAX_2100.csv
Austin_TMAX_1920.csv
Austin_TMAX_1930.csv
Austin_TMAX_1940.csv
Austin_TMAX_1950.csv
Austin_TMAX_1960.csv
Austin_TMAX_1970.csv
Austin_TMAX_1980.csv
Austin_TMAX_1990.csv
Austin_TMAX_2000.csv
Austin_TMAX_2010.csv
Austin_TMAX_2020.csv
Austin_TMAX_2030.csv
Austin_TMAX_2040.csv
Austin_TMAX_2050.csv
Austin_TMAX_2060.csv
Austin_TMAX_2070.csv
Austin_TMAX_2080.csv
Austin_TMAX_2090.csv
Austin_TMAX_2100.csv
Chicago_TMAX_1920.csv
Chicago_TMAX_1930.csv
Chicago_TMAX_1940.csv
Chicago_TMAX_1950.csv
Chicago_TMAX_1960.csv
Chicago_TMAX_1970.csv
Chicago_TMAX_1980.csv




Chicago_TMAX_1990.csv
Chicago_TMAX_2000.csv
Chicago_TMAX_2010.csv
Chicago_TMAX_2020.csv
Chicago_TMAX_2030.csv
Chicago_TMAX_2040.csv
Chicago_TMAX_2050.csv
Chicago_TMAX_2060.csv
Chicago_TMAX_2070.csv
Chicago_TMAX_2080.csv
Chicago_TMAX_2090.csv
Chicago_TMAX_2100.csv


In [None]:
cell_temps_past = daily_temps_1850_2005.sel(lat=lat, lon=lon, method='nearest')
cell_temps_fut = daily_temps_2006_2100.sel(lat=lat, lon=lon, method='nearest')

In [None]:
temps_past = cell_temps_past.sel(time=slice('1990-01-01', '1990-12-31'))
temps_fut = cell_temps_fut.sel(time=slice('2090-01-01', '2090-12-31'))

In [None]:
temps_past

In [None]:
temps_fut

In [None]:
df_1990 = temps_past.TREFHTMX.to_dataframe()

In [None]:
df_2090 = temps_fut.TREFHTMX.to_dataframe()

In [None]:
df_1990.to_csv('Boulder_TMAX_1990.csv',index=True)


In [None]:
df_2090.to_csv('Boulder_TMAX_2090.csv',index=True)

In [None]:
import matplotlib.pyplot as plt

In [None]:
times = temps_past.time.data
times[:3]

In [None]:
time_indexes = np.arange(1,times.size + 1)

In [None]:
temps = temps_past.TREFHTMX.data.transpose()
temps

In [None]:
ax = plt.axes()
ax.plot(time_indexes, temps)

In [None]:
ax = plt.axes()
ax.plot(time_indexes, temps)

In [None]:
cluster.close()

In [None]:
360 - 87.63

### Test NA-CORDEX Catalog Behavior Here

In [None]:
import intake

In [None]:
cordex_path = '/glade/u/home/bonnland/GitRepos/PullRequests/intake-esm-datastore-sethmcg/catalogs/glade-na-cordex.json'

In [None]:
col = intake.open_esm_datastore(cordex_path)

In [None]:
col

## Explore Zarr files

In [None]:
import xarray as xr


In [None]:
ds = xr.open_zarr("/glade/scratch/bonnland/lens-aws/ocn/monthly/cesmLE-CTRL-SFWF.zarr", consolidated=True)

In [None]:
ds