# Processing GISTEMPv4 data

### Set up
#### Packages

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
import datetime as dt
from datetime import timedelta
xr.set_options(display_expand_data=False);
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
from dask.diagnostics import ProgressBar
from Processing_functions import AddCyclic, FixLongitude

#### Filepaths & name variables

In [2]:
## File name
filename1 = 'air.2x2.1200.mon.anom.comb.nc'
filename2 = 'air.2x2.1200.mon.1991-2020.ltm.comb.nc'

## Filepaths
path_to_arch = "/glade/work/glydia/processed_GISTEMP_data/"
comp = 'atm'
var_ind = 0

# Variables
var_list = {'atm': [['air','TREFHT']]}
var = var_list[comp][var_ind][0]
var2 = var_list[comp][var_ind][1]

# Extensions
h_ext = {'atm': ['.h0.']}

path_to_outdata = '/glade/work/glydia/processed_GISTEMP_data/'

In [3]:
# cluster = PBSCluster(cores    = 1,
#                      memory   = '25GiB',
#                      queue    = 'casper',
#                      walltime = '02:00:00',
#                      project  = 'UCUB0137',
#                      name='piControl_'+var)
# cluster.scale(4*9)
# client = Client(cluster)

In [4]:
# client

In [5]:
## Chunking variables
time_ch = 88
chunks = {
    'atm': {'time': time_ch, 'latitude': 45, 'longitude': 90},
    'ice': {'time': time_ch, 'latitude': 45, 'longitude': 90}
}

### Load & modify data
#### Control data

In [6]:
%%time

yr_range = np.array([str(i) for i in np.arange(1950,2024)])
## Load data
# Open dataset
print(path_to_arch+filename1)
print(path_to_arch+filename2)
#ds = dask.delayed(xr.open_dataset(path_to_arch+filename,chunks=chunks[comp]))
ds_anom = xr.open_dataset(path_to_arch+filename1,chunks=chunks[comp])
ds_mean = xr.open_dataset(path_to_arch+filename2,chunks=chunks[comp])
dsv = ds_anom[var].groupby('time.month')+ds_mean[var].groupby('time.month').mean('time')+273.15

#del ds

processed_list = []
for i in range(0,len(yr_range)):
    startyr = yr_range[i]
    endyr = yr_range[i]
    ann_slice = dsv.sel(time=slice(startyr+'-01-01',endyr+'-12-17')) 
    print('sliced '+startyr+'-01-01 to '+endyr+'-12-17')
    
    fixedcoord_data = FixLongitude(ann_slice, False)
    fixedname_data = fixedcoord_data.rename(var2)
    print('   fixed coordinate and variable names')

    # addcyc_data = dask.delayed(AddCyclic)(fixedname_data)
    addcyc_data = AddCyclic(fixedname_data)

    latfix_data = addcyc_data.reindex(lat=list(reversed(addcyc_data.lat)))

    processed_list.append(latfix_data)
    #processed_list.append(fixedname_data)


# processed_comp = dask.compute(*processed_list)
print('computed list')

processed_out = xr.concat(processed_list,dim='time').chunk({'time':88})
# processed_out = xr.concat(processed_comp,dim='time').chunk({'time':88})
print('concatenated data')

processed_out.to_netcdf(path_to_outdata+'GISTEMP'+h_ext[comp][0]+var2+'.195001-202312.'+'nc', 
                            format='NETCDF4',encoding={var2: {"zlib": True, "complevel": 1}})
print('wrote data to disk')

/glade/work/glydia/processed_GISTEMP_data/air.2x2.1200.mon.anom.comb.nc
/glade/work/glydia/processed_GISTEMP_data/air.2x2.1200.mon.1991-2020.ltm.comb.nc
sliced 1950-01-01 to 1950-12-17
   fixed coordinate and variable names
sliced 1951-01-01 to 1951-12-17
   fixed coordinate and variable names
sliced 1952-01-01 to 1952-12-17
   fixed coordinate and variable names
sliced 1953-01-01 to 1953-12-17
   fixed coordinate and variable names
sliced 1954-01-01 to 1954-12-17
   fixed coordinate and variable names
sliced 1955-01-01 to 1955-12-17
   fixed coordinate and variable names
sliced 1956-01-01 to 1956-12-17
   fixed coordinate and variable names
sliced 1957-01-01 to 1957-12-17
   fixed coordinate and variable names
sliced 1958-01-01 to 1958-12-17
   fixed coordinate and variable names
sliced 1959-01-01 to 1959-12-17
   fixed coordinate and variable names
sliced 1960-01-01 to 1960-12-17
   fixed coordinate and variable names
sliced 1961-01-01 to 1961-12-17
   fixed coordinate and variable n

In [7]:
# client.shutdown()