In [5]:
import os
import numpy as np
from netCDF4 import Dataset
import datetime as dt
import sys
from glob import glob
from scipy.interpolate import RectSphereBivariateSpline # this is the interpolation method package





Below: `regrid_ncfile` is the path to the dataset which contains the latitude and longitude vectors you want to interpolate your data to. Alternatively, you may define your own lat and lon by defining `regrid_lat` and `regrid_lon` as some other numpy array 

In [7]:
regrid_ncfile = Dataset('/home/datasets/aos112_2021/AOS112_Lab_1_Projection_monthly_data/cmip5/pr_Amon_ACCESS1-0_historical_r1i1p1_185001-200512_2.5x2.5regrid.nc','r')
regrid_lat = regrid_ncfile.variables['lat'][:]
regrid_lon = regrid_ncfile.variables['lon'][:]

FileNotFoundError: [Errno 2] No such file or directory: b'/home/datasets/aos112_2021/AOS112_Lab_1_Projection_monthly_data/cmip5/pr_Amon_ACCESS1-0_historical_r1i1p1_185001-200512_2.5x2.5regrid.nc'

In [46]:
### The interpolating scheme works for radian coordinates of which lat is positive from 0 to pi
new_lats,new_lons  = np.meshgrid(np.deg2rad(regrid_lat)+np.pi/2,np.deg2rad(regrid_lon))


The `data_directory` should be the file path to the data. The data is organized as follows __/data_directory/model_name/hist/tas_hist_files__ and __/data_directory/model_name/ssp585/tas_ssp_files__ . You should have a target directory with subdirectories which are named __model_name__. Within each model subdirectory, you should have two folders, __hist__ and __ssp585__ which contain all the respective __.nc__ data files. 

The scripts below will loop through each subdirectory and compile all the __hist__ into a single __.nc__ file and the __ssp585__ files into a single file __.nc__ file which will be saved into the user-specfied `target_directory` file path. 

In [47]:
data_directory = '/home/twemmen/other_data/'
target_directory = '/home/twemmen/other_data/'


`f` is a list of file path to the model subdirectories. 

`model_names` should be the name of the model subdirectory, listed to check and so that you can copy and paste into other notebooks

In [48]:
f = sorted(glob(data_directory +'*'))
model_names = [x[len(data_directory):] for x in f]
[print('\'' + x + '\',') for x in model_names]

'CESM2',


[None]

### User-specified variables below will be used as nested loops, for pointing to files, pulling out variables, and saving the file. You may have more than one type of variable in a file, just put it in the list `variables`. 

In [49]:
variables = ['siconca','tos']

# Run the cell below for combining netcdf files *without* interpolation
### files will be created in the `target_directory`  , may take awhile 

In [50]:
for a,i in enumerate(f):
    print(model_names[a])
    for b,j in enumerate(variables):
        for c,k in enumerate(['hist','ssp585']):
            g = sorted(glob(i +'/' + k + '/'+ j + '*'))
            lat = Dataset(g[0],'r').variables['lat'][:]
            lon = Dataset(g[0],'r').variables['lon'][:]
            model_time = []
            global_var = np.zeros(((2017-1850)*12,len(lat),len(lon))) 
            global_var[:] = np.nan
            time_count = 0
            time_str = ['185001-201412','201501-210001']
            for d,l in enumerate(g):
                ncfile = Dataset(l,'r')
                file_time = ncfile.variables['time'][:]
                model_time = np.append(model_time,file_time)
                global_var[time_count:time_count+len(file_time),:,:] = ncfile.variables[j][:]
                time_count = time_count+len(file_time)
            
            
            joint_netcdf = Dataset(target_directory + '/'+ j + '_Amon_' + model_names[a] + '_'+ k + '_'+ time_str[c]+ '_'+str(np.round(np.diff(lat)[0],2)) +'x'+ str(np.round(np.diff(lon)[0],2))+ '.nc','w',format='NETCDF4')

            joint_netcdf.createDimension('time',size = len(model_time))
            joint_netcdf.createDimension('lat',size = len(lat))
            joint_netcdf.createDimension('lon',size = len(lon))





            time_val = joint_netcdf.createVariable('time',np.float64,('time',))
            time_val[:] = model_time
            time_val.units = ncfile.variables['time'].units
            time_val.calendar = ncfile.variables['time'].calendar

            lon_val= joint_netcdf.createVariable('lon',np.float64,('lon',))
            lon_val.units="degree"
            lon_val[:]=lon

            lat_val= joint_netcdf.createVariable('lat',np.float64,('lat',))
            lat_val.units="degree"
            lat_val[:]=lat

            var_val = joint_netcdf.createVariable(j,np.float64,('time','lat','lon'))
            var_val[:] = global_var[0:len(model_time),:,:]


            joint_netcdf.close()


CESM2


cannot be safely cast to variable data type


In [43]:
Dataset(g[0],'r').variables

{'tosga': <class 'netCDF4._netCDF4.Variable'>
 float32 tosga(time)
     _FillValue: 1e+20
     cell_methods: area: mean where sea time: mean
     comment: POP_surf_mean(KMT,TAREA,TEMP[:,0,:,:])
     coordinates: time
     description: This may differ from "surface temperature" in regions of sea ice or floating ice shelves. For models using conservative temperature as the prognostic field, they should report the top ocean layer as surface potential temperature, which is the same as surface in situ temperature.
     frequency: mon
     id: tosga
     long_name: Global Average Sea Surface Temperature
     mipTable: Omon
     missing_value: 1e+20
     out_name: tosga
     prov: Omon ((isd.003))
     realm: ocean
     standard_name: sea_surface_temperature
     time: time
     time_label: time-mean
     time_title: Temporal mean
     title: Global Average Sea Surface Temperature
     type: real
     units: degC
     variable_id: tosga
 unlimited dimensions: time
 current shape = (600,)
 fil

# Run the cell below for combining netcdf files *with* interpolation
### files will be created in the `target_directory` 

In [123]:
for a,i in enumerate(f):
    print(model_names[a])
    for b,j in enumerate(['pr','tas']):
        for c,k in enumerate(['hist','ssp585']):
            g = sorted(glob(i +'/' + k + '/'+ j + '*'))
            model_lat = Dataset(g[0],'r').variables['lat'][:]
            model_lon = Dataset(g[0],'r').variables['lon'][:]                
            lats      = np.deg2rad(model_lat)+np.pi/2
            lons      = np.deg2rad(model_lon)  
            model_time = []
            global_var = np.zeros(((2017-1850)*12,len(regrid_lat),len(regrid_lon)))
            global_var[:] = np.nan
            time_count = 0
            time_str = ['185001-201412','201501-210001']
            for d,l in enumerate(g):
                ncfile = Dataset(l,'r')
                file_time = ncfile.variables['time'][:]
                model_time = np.append(model_time,file_time)
                temp_data = ncfile.variables[j][:]
                
                for m in np.arange(len(file_time)):

                    if np.sum(np.logical_or(model_lat>=90,model_lat<=-90))>0:
                        valid_lats = np.squeeze(np.where(np.logical_and(model_lat<90,model_lat>-90)))
                        interp_cmip6    = RectSphereBivariateSpline(lats[valid_lats], lons, temp_data[m,valid_lats,:]) 
                        regrid_cmip6    = interp_cmip6.ev(new_lats.ravel(),new_lons.ravel()).reshape((len(regrid_lon),len(regrid_lat))).T

                    else:       
                        interp_cmip6    = RectSphereBivariateSpline(lats, lons, temp_data[m]) 
                        regrid_cmip6    = interp_cmip6.ev(new_lats.ravel(),new_lons.ravel()).reshape((len(regrid_lon),len(regrid_lat))).T

                    global_var[time_count+m,:,:] = regrid_cmip6
                
                time_count = time_count+len(file_time)




            joint_netcdf = Dataset(target_directory + '/'+ j + '_Amon_' + model_names[a] + '_'+ k + '_'+ time_str[c]+ '_regrid.nc','w',format='NETCDF4')

            joint_netcdf.createDimension('time',size = len(model_time))
            joint_netcdf.createDimension('lat',size = len(regrid_lat))
            joint_netcdf.createDimension('lon',size = len(regrid_lon))





            time_val = joint_netcdf.createVariable('time',np.float64,('time',))
            time_val[:] = model_time
            time_val.units = ncfile.variables['time'].units
            time_val.calendar = ncfile.variables['time'].calendar

            lon_val= joint_netcdf.createVariable('lon',np.float64,('lon',))
            lon_val.units="degree"
            lon_val[:]=regrid_lon

            lat_val= joint_netcdf.createVariable('lat',np.float64,('lat',))
            lat_val.units="degree"
            lat_val[:]=regrid_lat

            var_val = joint_netcdf.createVariable(j,np.float64,('time','lat','lon'))
            var_val[:] = global_var[0:len(model_time),:,:]


            joint_netcdf.close()

CESM2


cannot be safely cast to variable data type


IPSL-CM6A-LR
GISS-E2-1-G
MPI-ESM1-2-LR
GFDL-ESM4
FGOALS-G3
