In [1]:
!pip install rasterio rioxarray geopandas netcdf4 h5netcdf xarray shapely scikit-learn opencv-python imageio folium pyproj



You should consider upgrading via the 'C:\Users\Cindy\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [2]:
import os, glob, sys
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import imageio
from scipy.ndimage import zoom
import warnings
warnings.filterwarnings("ignore")

In [4]:
gfs = xr.open_dataset('data/gfs.nc')
print(gfs)

<xarray.Dataset> Size: 7MB
Dimensions:                (time: 85, lat: 37, lon: 81, ntb: 2)
Coordinates:
  * time                   (time) datetime64[ns] 680B 2019-01-11 ... 2019-02-01
  * lat                    (lat) float32 148B 50.0 49.75 49.5 ... 41.25 41.0
  * lon                    (lon) float32 324B 266.0 266.2 266.5 ... 285.8 286.0
Dimensions without coordinates: ntb
Data variables:
    valid_date_time        (time) |S10 850B ...
    ref_date_time          (time) |S10 850B ...
    forecast_hour          (time) timedelta64[ns] 680B ...
    TMP_L103               (time, lat, lon) float32 1MB ...
    R_H_L103               (time, lat, lon) float32 1MB ...
    U_GRD_L103             (time, lat, lon) float32 1MB ...
    V_GRD_L103             (time, lat, lon) float32 1MB ...
    PRES_L1                (time, lat, lon) float32 1MB ...
    PRATE_L1_Avg_1         (time, lat, lon) float32 1MB ...
    time_bnds              (time, ntb) datetime64[ns] 1kB ...
    valid_date_time_range  (ti

In [5]:
hrrr = xr.open_dataset('data/hrrr.nc')
print(hrrr)

<xarray.Dataset> Size: 4GB
Dimensions:             (time: 696, y: 365, x: 604)
Coordinates:
    latitude            (y, x) float64 2MB ...
    longitude           (y, x) float64 2MB ...
  * time                (time) datetime64[ns] 6kB 2019-01-11 ... 2019-02-08T2...
  * x                   (x) float64 5kB 0.0 3e+03 6e+03 ... 1.806e+06 1.809e+06
  * y                   (y) float64 3kB 0.0 3e+03 6e+03 ... 1.089e+06 1.092e+06
Data variables:
    PRATE_surface       (time, y, x) float32 614MB ...
    air_temp            (time, y, x) float32 614MB ...
    pressure_sea_level  (time, y, x) float32 614MB ...
    rel_humidty         (time, y, x) float32 614MB ...
    sky_cover           (time, y, x) float32 614MB ...
    windu               (time, y, x) float32 614MB ...
    windv               (time, y, x) float32 614MB ...
Attributes:
    Conventions:          CF-1.0
    GRIB2_grid_template:  30
    History:              Mon Nov  2 21:22:14 2020: ncks -O --deflate 1 nc_fi...
    NCO:         

In [6]:
temp = xr.open_dataset('data/temp.nc')
print(temp)

<xarray.Dataset> Size: 96MB
Dimensions:      (time: 21, nx: 1024, ny: 1024)
Coordinates:
    lat          (nx, ny) float32 4MB ...
    lon          (nx, ny) float32 4MB ...
  * time         (time) datetime64[ns] 168B 2019-01-11 2019-01-12 ... 2019-01-31
Dimensions without coordinates: nx, ny
Data variables:
    day_of_year  (time) int32 84B ...
    temp         (time, nx, ny) float32 88MB ...
    year         (time) int32 84B ...
Attributes:
    institution:    NOAA/GLERL/COASTWATCH
    comment:        Great Lakes GLSEA & NIC Ice
    author:         gregory.lang@noaa.gov
    references:     https://coastwatch.glerl.noaa.gov/
    creation_date:  Mon Jan  6 15:05:13 2020 GMT
    disclaimer:     https://www.glerl.noaa.gov/home/notice.html
    history:        Mon Jan  6 15:08:49 2020: /opt/local/bin/ncks -O --deflat...
    NCO:            netCDF Operators version 4.8.1 (Homepage = http://nco.sf....


In [7]:
# parse iceclass data
iceclass_files = sorted(glob.glob('data/iceclass/*.nc'))
iceclass_datasets = [xr.open_dataset(f) for f in iceclass_files]
print(iceclass_datasets)

[<xarray.Dataset> Size: 472MB
Dimensions:                 (y: 3362, x: 3512, xfit: 6)
Coordinates:
    longitude               (y, x) float32 47MB ...
    latitude                (y, x) float32 47MB ...
Dimensions without coordinates: y, x, xfit
Data variables: (12/59)
    acquisition_time        datetime64[ns] 8B ...
    nx                      int32 4B ...
    ny                      int32 4B ...
    nx0                     int32 4B ...
    ny0                     int32 4B ...
    nx00                    int32 4B ...
    ...                      ...
    incid_coef              (xfit) float64 48B ...
    incid_xexp              (xfit) float32 24B ...
    incid_yexp              (xfit) float32 24B ...
    rlook_coef              (xfit) float64 48B ...
    rlook_xexp              (xfit) float32 24B ...
    rlook_yexp              (xfit) float32 24B ...
Attributes: (12/38)
    title:                                  SAR winds at 10-m height neutral ...
    institution:                   

In [8]:
gfs_test = xr.open_dataset('test/gfs_weather_test_period.nc')
print(gfs_test)

<xarray.Dataset> Size: 1MB
Dimensions:         (time: 17, lat: 37, lon: 81, ntb: 2)
Coordinates:
  * lat             (lat) float32 148B 50.0 49.75 49.5 49.25 ... 41.5 41.25 41.0
  * lon             (lon) float32 324B 266.0 266.2 266.5 ... 285.5 285.8 286.0
  * time            (time) object 136B 1500-07-02 00:00:00 ... 1500-07-06 00:...
Dimensions without coordinates: ntb
Data variables:
    PRATE_L1_Avg_1  (time, lat, lon) float32 204kB ...
    PRES_L1         (time, lat, lon) float32 204kB ...
    R_H_L103        (time, lat, lon) float32 204kB ...
    TMP_L103        (time, lat, lon) float32 204kB ...
    T_CDC_L10       (time, lat, lon) float32 204kB ...
    U_GRD_L103      (time, lat, lon) float32 204kB ...
    V_GRD_L103      (time, lat, lon) float32 204kB ...
    forecast_hour   (time) timedelta64[ns] 136B ...
    ref_date_time   (time) |S10 170B ...
    time_bnds       (time, ntb) object 272B ...
Attributes:
    Creation date and time:  2025-11-02 09:18:27 -0700
    Conventions: 

In [9]:
hrrr_test = xr.open_dataset('test/hrrr_weather_test_period.nc')
print(hrrr_test)

<xarray.Dataset> Size: 602MB
Dimensions:             (time: 97, y: 365, x: 604)
Coordinates:
    latitude            (y, x) float64 2MB ...
    longitude           (y, x) float64 2MB ...
  * time                (time) object 776B 1500-07-01 01:00:00 ... 1500-07-05...
  * x                   (x) float64 5kB 0.0 3e+03 6e+03 ... 1.806e+06 1.809e+06
  * y                   (y) float64 3kB 0.0 3e+03 6e+03 ... 1.089e+06 1.092e+06
Data variables:
    PRATE_surface       (time, y, x) float32 86MB ...
    air_temp            (time, y, x) float32 86MB ...
    pressure_sea_level  (time, y, x) float32 86MB ...
    rel_humidty         (time, y, x) float32 86MB ...
    sky_cover           (time, y, x) float32 86MB ...
    windu               (time, y, x) float32 86MB ...
    windv               (time, y, x) float32 86MB ...
Attributes:
    Conventions:               CF-1.0
    GRIB2_grid_template:       30
    NCO:                       netCDF Operators version 5.3.4 (Homepage = htt...
    nco_openm

In [10]:
initial_conditions = xr.open_dataset('test/glsea_ice_test_initial_condition.nc')
print(initial_conditions)

<xarray.Dataset> Size: 4MB
Dimensions:  (lon: 1024, lat: 1024)
Coordinates:
  * lon      (lon) float64 8kB -92.41 -92.39 -92.38 ... -75.9 -75.89 -75.87
  * lat      (lat) float64 8kB 38.87 38.89 38.9 38.91 ... 50.57 50.58 50.59 50.6
Data variables:
    sst      (lat, lon) float32 4MB ...
