# Script for Extracting MRB Data
**Description:** Reads in raw ERA5 and CESM data and outputs only the temperature data for the MRB to be used in the Func4ModelComparison  
**Input Data:** Raw ERA5 and CESM Data  
**Output Data:** only temperature data for ERA5 and CESM over the MRB  
**Date:** June 2022  
**Creator:** Emma Perkins  

In [1]:
# Import relevant Packages
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import glob
import itertools
import scipy.stats as stats
from math import sqrt

In [2]:
# cesm full data, long step
cesm_path = '/glade/campaign/cesm/collections/cesmLE/CESM-CAM5-BGC-LE/atm/proc/tseries/daily/TREFHT/'
cesm_file1 = cesm_path+'b.e11.B1850C5CN.f09_g16.005.cam.h1.TREFHT.19000101-19991231.nc'  # TREFHT = temp at reference height (2-meter air temp)
cesm_file2 = cesm_path+'b.e11.B1850C5CN.f09_g16.005.cam.h1.TREFHT.20000101-20991231.nc'
cesm_files = [cesm_file1, cesm_file2]
with xr.open_mfdataset(cesm_files, concat_dim=None) as cesm_full:
    print(cesm_full)

cesm_temp = cesm_full.TREFHT

outpath = '/glade/work/eperkins/CESM_MRB_TempData.nc'
cesm_temp.to_netcdf(outpath)

<xarray.Dataset>
Dimensions:       (lev: 30, time: 73000, ilev: 31, lat: 192, lon: 288, slat: 191, slon: 288, nbnd: 2)
Coordinates:
  * lev           (lev) float64 3.643 7.595 14.36 24.61 ... 957.5 976.3 992.6
  * ilev          (ilev) float64 2.255 5.032 10.16 18.56 ... 967.5 985.1 1e+03
  * time          (time) object 1900-01-02 00:00:00 ... 2100-01-01 00:00:00
  * lat           (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
  * lon           (lon) float64 0.0 1.25 2.5 3.75 ... 355.0 356.2 357.5 358.8
  * slat          (slat) float64 -89.53 -88.59 -87.64 ... 87.64 88.59 89.53
  * slon          (slon) float64 -0.625 0.625 1.875 3.125 ... 355.6 356.9 358.1
Dimensions without coordinates: nbnd
Data variables: (12/32)
    hyam          (time, lev) float64 dask.array<chunksize=(36500, 30), meta=np.ndarray>
    hybm          (time, lev) float64 dask.array<chunksize=(36500, 30), meta=np.ndarray>
    hyai          (time, ilev) float64 dask.array<chunksize=(36500, 31), meta=np.n

In [3]:
# ERA5 data (need to combine 2 datasets), long step
era5_path = '/glade/work/eperkins/'
era5_file1 = era5_path+'ERA5_hourly_temp_1950_1978.nc'
era5_file2 = era5_path+'ERA5_hourly_temp_1979_2014.nc'
era5_files = [era5_file1, era5_file2]

with xr.open_dataset(era5_file1) as era5_data1:
    print(era5_data1)
with xr.open_dataset(era5_file2) as era5_data2:
    print(era5_data2)

era5_data = xr.merge([era5_data1, era5_data2])
era5_data = era5_data.resample(time='1D').mean('time')
era5_temp = era5_data.t2m

outpath = '/glade/work/eperkins/ERA5_MRB_TempData.nc'
era5_temp.to_netcdf(outpath)

<xarray.Dataset>
Dimensions:    (longitude: 153, latitude: 89, time: 42368)
Coordinates:
  * longitude  (longitude) float32 -140.0 -139.8 -139.5 ... -102.5 -102.2 -102.0
  * latitude   (latitude) float32 72.0 71.75 71.5 71.25 ... 50.5 50.25 50.0
  * time       (time) datetime64[ns] 1950-01-01 ... 1978-12-31T18:00:00
Data variables:
    t2m        (time, latitude, longitude) float32 ...
Attributes:
    Conventions:  CF-1.6
    history:      2022-06-16 00:18:40 GMT by grib_to_netcdf-2.24.3: /opt/ecmw...
<xarray.Dataset>
Dimensions:    (longitude: 153, latitude: 89, time: 52596)
Coordinates:
  * longitude  (longitude) float32 -140.0 -139.8 -139.5 ... -102.5 -102.2 -102.0
  * latitude   (latitude) float32 72.0 71.75 71.5 71.25 ... 50.5 50.25 50.0
  * time       (time) datetime64[ns] 1979-01-01 ... 2014-12-31T18:00:00
Data variables:
    t2m        (time, latitude, longitude) float32 ...
Attributes:
    Conventions:  CF-1.6
    history:      2022-06-15 20:37:40 GMT by grib_to_netcdf-2.24.3: