Datasets:
- https://psl.noaa.gov/repository/entry/show?entryid=d3422997-979a-4abb-9bf2-503a80bb647f
- https://psl.noaa.gov/repository/entry/show/PSD+Climate+Data+Repository/Public/PSD+Datasets/PSD+Gridded+Datasets/ncep.reanalysis.derived/surface/slp.mon.mean.nc?entryid=synth%3Ae570c8f9-ec09-4e89-93b4-babd5651e7a9%3AL25jZXAucmVhbmFseXNpcy5kZXJpdmVkL3N1cmZhY2Uvc2xwLm1vbi5tZWFuLm5j&output=html.info

In [1]:
import sys
!conda install --yes --prefix {sys.prefix} netcdf4

Channels:
 - defaults
 - conda-forge
 - jetbrains
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.



In [2]:
import datetime as dt

import netCDF4
import pandas as pd

## Prepare 'air' data

In [3]:
air_nc = netCDF4.Dataset("../data/air.mon.mean.nc")
air_nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    description:  Data from NCEP initialized reanalysis (4x/day).  These are interpolated to pressure surfaces from model (sigma) surfaces.
    platform: Model
    Conventions: COARDS
    NCO: 20121012
    history: Mon Jul  5 21:45:36 1999: ncrcat air.mon.mean.nc /Datasets/ncep.reanalysis.derived/pressure/air.mon.mean.nc /dm/dmwork/nmc.rean.ingest/combinedMMs/air.mon.mean.nc
/home/hoop/crdc/cpreanjuke2farm/cpreanjuke2farm Tue Oct 17 20:07:08 1995 from air.85.nc
created 95/02/06 by Hoop (netCDF2.3)
Converted to chunked, deflated non-packed NetCDF4 2014/09
    title: monthly mean air from the NCEP Reanalysis
    dataset_title: NCEP-NCAR Reanalysis 1
    References: http://www.psl.noaa.gov/data/gridded/data.ncep.reanalysis.derived.html
    dimensions(sizes): level(17), lat(73), lon(144), time(917)
    variables(dimensions): float32 level(level), float32 lat(lat), float32 lon(lon), float64 time(ti

In [4]:
air_lat = air_nc.variables['lat'][:]
air_lon = air_nc.variables['lon'][:]
air_level = air_nc.variables['level']
air_time = air_nc.variables['time']
air_dtime = netCDF4.num2date(air_time[:], air_time.units)
air_var = air_nc.variables['air'][:]

In [5]:
air_datetime_slice = air_dtime[:] == dt.datetime.strptime("1998-01-01", "%Y-%m-%d")
air_level_slice = air_level[:] == 850
air_df = pd.DataFrame(air_var[air_datetime_slice, air_level_slice, :, :][0], index=air_lat, columns=air_lon).stack().reset_index()
air_df.columns = ["lat", "lon", "air"]
air_df

Unnamed: 0,lat,lon,air
0,90.0,0.0,-20.349998
1,90.0,2.5,-20.349998
2,90.0,5.0,-20.349998
3,90.0,7.5,-20.349998
4,90.0,10.0,-20.349998
...,...,...,...
10507,-90.0,347.5,-12.859993
10508,-90.0,350.0,-12.859993
10509,-90.0,352.5,-12.859993
10510,-90.0,355.0,-12.859993


In [6]:
air_df.to_csv("../data/air.csv", index=False, encoding="utf-8")

## Prepare 'slp' data

In [7]:
slp_nc = netCDF4.Dataset("../data/slp.mon.mean.nc")
slp_nc

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    description: Data is from NMC initialized reanalysis
(4x/day).  These are the 0.9950 sigma level values.
    platform: Model
    Conventions: COARDS
    NCO: 20121012
    history: Thu May  4 18:12:35 2000: ncrcat -d time,0,622 /Datasets/ncep.reanalysis.derived/surface/slp.mon.mean.nc ./surface/slp.mon.mean.nc
Mon Jul  5 23:22:35 1999: ncrcat slp.mon.mean.nc /Datasets/ncep.reanalysis.derived/surface/slp.mon.mean.nc /dm/dmwork/nmc.rean.ingest/combinedMMs/slp.mon.mean.nc
/home/hoop/crdc/cpreanjuke2farm/cpreanjuke2farm Thu Oct 26 23:42:16 1995 from pre.sig995.85.nc
created 95/02/06 by Hoop (netCDF2.3)
Converted to chunked, deflated non-packed NetCDF4 2014/09
    title: monthly mean slp from the NCEP Reanalysis
    dataset_title: NCEP-NCAR Reanalysis 1
    References: http://www.psl.noaa.gov/data/gridded/data.ncep.reanalysis.derived.html
    dimensions(sizes): lat(73), lon(144), time(917)
    v

In [8]:
slp_lat = slp_nc.variables['lat'][:]
slp_lon = slp_nc.variables['lon'][:]
slp_time = slp_nc.variables['time']
slp_dtime = netCDF4.num2date(slp_time[:], slp_time.units)
slp_var = slp_nc.variables['slp'][:]

In [9]:
slp_datetime_slice = slp_dtime[:] == dt.datetime.strptime("1998-01-01", "%Y-%m-%d")
slp_df = pd.DataFrame(slp_var[slp_datetime_slice, :, :][0], index=slp_lat, columns=slp_lon).stack().reset_index()
slp_df.columns = ["lat", "lon", "slp"]
slp_df

Unnamed: 0,lat,lon,slp
0,90.0,0.0,1022.198059
1,90.0,2.5,1022.198059
2,90.0,5.0,1022.198059
3,90.0,7.5,1022.198059
4,90.0,10.0,1022.198059
...,...,...,...
10507,-90.0,347.5,999.845459
10508,-90.0,350.0,999.845459
10509,-90.0,352.5,999.845459
10510,-90.0,355.0,999.845459


In [10]:
slp_df.to_csv("../data/slp.csv", index=False, encoding="utf-8")