https://usicecenter.gov/Products/ImsHome

https://nsidc.org/data/user-resources/help-center/how-access-data-using-ftp-client-command-line-wget-or-python

https://www.itsonlyamodel.us/

In [None]:
from ftplib import FTP
import os
from os.path import join
from datetime import datetime
import rioxarray as rio
import numpy as np
import pandas as pd
import rioxarray as rxa
import urllib.request
import gzip
import datetime
import shutil
import pandas as pd
import xarray as xr
import rioxarray as rxa
from pyproj import Transformer

def decompress(infile, tofile):
    with open(infile, 'rb') as inf, open(tofile, 'wb') as ouf:
        decom_str = gzip.decompress(inf.read())
        ouf.write(decom_str)

        return tofile

def get_ims_day_data(year: str, doy: str, tmp_dir: str, clean: bool = True) -> xr.DataArray:
    """
    Download and decompress one days worth of IMS data.

    Args:
    year: Year of the data you want.
    doy: Calendar day of year you want in 'DDD' format. Range is 001 - 366.
    """
    os.makedirs(tmp_dir, exist_ok = True)
    os.chdir(tmp_dir)
    local_fp, _ = urllib.request.urlretrieve(f'ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/{year}/ims{year}{doy}_1km_v1.3.nc.gz', f'ims{year}{doy}_1km_v1.3.nc.gz')
    out_file = decompress(local_fp, local_fp.replace('.gz',''))
    ims = rxa.open_rasterio(out_file, decode_times = False)

    if clean:
        shutil.rmtree(tmp_dir)
    return ims

def download_snow_cover(dataset: xr.Dataset, dates: (str, str), tmp_dir = './tmp') -> xr.Dataset:
    """
    Download IMS snow-cover images.

    Args:
    dataset: Full dataset to add IMS data to
    dates: Tuple of dates to get data between (YYYY-MM-DD)
    tmp_dir: filepath to save temporary downloads to [default: './tmp']

    Returns:
    None
    """
    dates = [{'date': datetime.strptime(d, '%Y-%m-%d'), 'year': int(d.split('-')[0]), 'doy':datetime.strptime(d, '%Y-%m-%d').timetuple().tm_yday} for d in dates]
    assert dates[0]['date'] < dates[1]['date']
    for year in range(dates[0]['year'], dates[1]['year']+1):
        print(year)
        days = list(range(1, 366))
        if year == dates[0]['year']:
            days = [d for d in days if d > dates[0]['doy']]
        elif year == dates[1]['year']:
            days = [d for d in days if d < dates[1]['doy']+1]
        for day in days:
            print(day)
            try:
                date = pd.to_datetime(f'{year} {day}', format = '%Y %j')
                ims = get_ims_day_data(year, f'{day:03}', tmp_dir = tmp_dir, clean = False) #revert to clean = True at somepoint
                dataset = add_ims_data(dataset, ims, date)
            except URLError as e:
                print(e)
                print(f'Missing {date.date()}')
    
    return dataset

def add_ims_data(dataset: xr.Dataset, ims: xr.DataArray, date: pd.Timestamp) -> xr.Dataset:
    """
    Add xarray dataArray of IMS data to a larger xarray Dataset.

    Args:
    dataset: Xarray dataset to add IMS data to
    ims: IMS dataArray for one days worth of data
    date: Date of IMS retrieval
    """
    transformer = Transformer.from_crs(4326, 9001, always_xy=True)
    polar_bounds = transformer.transform(*dataset['s1'].rio.bounds())
    ims = ims.rio.clip_box(*polar_bounds)
    ims = ims.rio.reproject_match(dataset['s1'])
    ims = ims.assign_coords(time = [date])
    dataset = xr.merge([dataset, ims.rename('ims')])

    return dataset

In [None]:
import pickle
with open('/Users/zachkeskinen/Documents/spicy-snow/tests/test_data/s1_da.pkl', 'rb') as f:
    da = pickle.load(f)
ds = da.to_dataset(name = 's1', promote_attrs = True)
days = [pd.to_datetime(d) for d in ds.time.values]
for day in days:
    print(day)
    tmp_dir= '/Users/zachkeskinen/Documents/spicy-snow/data/tmp'
    ims = get_ims_day_data(day.year, f'{day.day:03}', tmp_dir = tmp_dir, clean = False) #revert to clean = True at somepoint
    ds = add_ims_data(ds, ims, day)

In [None]:
import gzip
import os
import shutil
import tempfile

import netCDF4

def open_netcdf(fname):
    if fname.endswith(".gz"):
        infile = gzip.open(fname, 'rb')
        tmp = tempfile.NamedTemporaryFile(delete=False)
        shutil.copyfileobj(infile, tmp)
        infile.close()
        tmp.close()
        data = netCDF4.Dataset(tmp.name)
        os.unlink(tmp.name)
    else:
        data = netCDF4.Dataset(fname)
    return data

In [None]:
import gzip
import shutil

def parse_lines(file_path):
    with open(file_path) as f:  
        # read the content of the file opened
        lines = f.readlines()
    lines = [l.strip().strip().split(':', 1) for l in lines]
    lines = {l[0].strip():l[1].strip() for l in lines if len(l) == 2}
    return lines

def read_data_1km_compressed(filename):
  nx = 8000
  widths = np.full((nx), 1, dtype=int).tolist()
  data = pd.read_fwf(filename, widths=widths, lineterminator='\n', header=None).values
  return(data)

def read_data_1km(year=2017, doy=300):
  nx = 24576
  url = ("ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/%s/ims%s%s_1km_v1.3.nc.gz" %
         (year, year, doy))
  widths = np.full((nx), 1, dtype=int).tolist()
  print(url)
  # data = pd.read_fwf(url, skiprows=30, widths=widths,
  #                             lineterminator='\n', header=None, compression='gzip').values
  with gzip.open(url, 'rb') as f_in:
    with open('/Users/zachkeskinen/Documents/spicy-snow/data/ims_tmp.nc', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

In [None]:
year = 2017
doy = '001'
local_fp, _ = urllib.request.urlretrieve(f'ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/{year}/ims{year}{doy}_1km_v1.3.nc.gz', f'ims{year}{doy}_1km_v1.3.nc.gz')

In [None]:
rxa.open_rasterio('/Users/zachkeskinen/Documents/spicy-snow/data/tmp/ims2019363_1km_v1.3.nc')

In [None]:
import urllib.request
import gzip
import datetime
import shutil
import pandas as pd
import xarray as xr
import rioxarray as rxa
from pyproj import Transformer

def decompress(infile, tofile):
    with open(infile, 'rb') as inf, open(tofile, 'wb') as ouf:
        decom_str = gzip.decompress(inf.read())
        ouf.write(decom_str)

        return tofile

def get_ims_data(year, doy):
    local_fp, _ = urllib.request.urlretrieve(f'ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/{year}/ims{year}{doy}_1km_v1.3.nc.gz', f'ims{year}{doy}_1km_v1.3.nc.gz')
    out_file = decompress(local_fp, local_fp.replace('.gz',''))
    return out_file

def add_ims_data(dataset, ims, date):
    transformer = Transformer.from_crs(4326, 9001, always_xy=True)
    polar_bounds = transformer.transform(*dataset.rio.bounds())
    ims = ims.rio.clip_box(*polar_bounds)
    ims = ims.rio.reproject_match(dataset)
    ims = ims.assign_coords(time = [date])
    dataset = xr.merge([dataset, ims.rename('ims')])

    return dataset

# import pickle
# with open('/Users/zachkeskinen/Documents/spicy-snow/tests/test_data/s1_da.pkl', 'rb') as f:
#     da = pickle.load(f)
# ds = da.to_dataset(name = 's1', promote_attrs = True)
# out_ims = get_ims_data(2017, '033')
# ims = rxa.open_rasterio(out_ims)
# doy = int('032')
# date = pd.to_datetime(f'{2017} {doy}', format = '%Y %j')
# ds = add_ims_data(ds, ims, date)

In [None]:
def get_ims_day_data(year: str, doy: str, tmp_dir: str, clean: bool = True) -> xr.DataArray:
    """
    Download and decompress one days worth of IMS data.

    Args:
    year: Year of the data you want.
    doy: Calendar day of year you want in 'DDD' format. Range is 001 - 366.
    """
    os.makedirs(tmp_dir, exist_ok = True)
    os.chdir(tmp_dir)
    local_fp, _ = urllib.request.urlretrieve(f'ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/{year}/ims{year}{doy}_1km_v1.3.nc.gz', f'ims{year}{doy}_1km_v1.3.nc.gz')
    out_file = decompress(local_fp, local_fp.replace('.gz',''))
    ims = rxa.open_rasterio(out_file, decode_times = False)

    if clean:
        shutil.rmtree(tmp_dir)
    return ims

def download_snow_cover(dataset: xr.Dataset, dates: (str, str), tmp_dir = './tmp') -> xr.Dataset:
    """
    Download IMS snow-cover images.

    Args:
    dataset: Full dataset to add IMS data to
    dates: Tuple of dates to get data between (YYYY-MM-DD)
    tmp_dir: filepath to save temporary downloads to [default: './tmp']

    Returns:
    None
    """
    dates = [{'date': datetime.strptime(d, '%Y-%m-%d'), 'year': int(d.split('-')[0]), 'doy':datetime.strptime(d, '%Y-%m-%d').timetuple().tm_yday} for d in dates]
    assert dates[0]['date'] < dates[1]['date']
    for year in range(dates[0]['year'], dates[1]['year']+1):
        print(year)
        days = list(range(1, 366))
        if year == dates[0]['year']:
            days = [d for d in days if d > dates[0]['doy']]
        elif year == dates[1]['year']:
            days = [d for d in days if d < dates[1]['doy']+1]
        for day in days:
            print(day)
            try:
                date = pd.to_datetime(f'{year} {day}', format = '%Y %j')
                ims = get_ims_day_data(year, f'{day:03}', tmp_dir = tmp_dir, clean = False) #revert to clean = True at somepoint
                dataset = add_ims_data(dataset, ims, date)
            except URLError as e:
                print(e)
                print(f'Missing {date.date()}')
    
    return dataset

def add_ims_data(dataset: xr.Dataset, ims: xr.DataArray, date: pd.Timestamp) -> xr.Dataset:
    """
    Add xarray dataArray of IMS data to a larger xarray Dataset.

    Args:
    dataset: Xarray dataset to add IMS data to
    ims: IMS dataArray for one days worth of data
    date: Date of IMS retrieval
    """
    print('Transforming coords')
    transformer = Transformer.from_crs(4326, 9001, always_xy=True)
    polar_bounds = transformer.transform(*dataset['s1'].rio.bounds())
    print('Clipping')
    ims = ims.rio.clip_box(*polar_bounds)
    print('Reprojecting')
    ims = ims.rio.reproject_match(dataset['s1'])
    ims = ims.assign_coords(time = [date])
    print('adding')
    dataset = xr.merge([dataset, ims.rename('ims')])

    return dataset

In [None]:
import pickle
with open('/Users/zachkeskinen/Documents/spicy-snow/tests/test_data/s1_da.pkl', 'rb') as f:
    da = pickle.load(f)
ds = da.to_dataset(name = 's1', promote_attrs = True)
days = [pd.to_datetime(ds.time.values[0]) for d in ds.time.values]
for day in days:
    print(day)
    tmp_dir= '/Users/zachkeskinen/Documents/spicy-snow/data/tmp'
    ims = get_ims_day_data(day.year, f'{day.day:03}', tmp_dir = tmp_dir, clean = False) #revert to clean = True at somepoint
    ds = add_ims_data(ds, ims, day)

In [None]:
with open('/Users/zachkeskinen/Documents/spicy-snow/data/ims_v1.pkl', 'rb') as f:
    ds = pickle.load(f)

In [None]:
ds['ims']

In [None]:
transformer = Transformer.from_crs(4326, 9001, always_xy=True)
polar_bounds = transformer.transform(*ds.rio.bounds())

In [None]:
import urllib
import io
from zipfile import ZipFile

mysock = urllib.urlopen('ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/2017/ims2017001_1km_v1.3.nc.gz')
memfile = io.BytesIO(mysock.read())
with ZipFile(memfile, 'r') as myzip:
    f = myzip.open('eggs.txt')

In [None]:
def fpt_download(domain = "ftp://sidads.colorado.edu/pub/DATASETS/NOAA/G02156/netcdf/1km/"):
    

In [None]:
read_data_1km()

In [None]:
### The following 3 variables can be changed ###
# 1. Set the directory you would like to download the files to
destdir='/Users/zachkeskinen/Documents/c-snow/contrib/keskinen/data'

# 2. Set the path to the FTP directory that contains the data you wish to download.
# This example is for the daily northern hemisphere data from the Sea Ice Index
# https://nsidc.org/data/g02135
directory = '/DATASETS/NOAA/G02156/1km/'

# 3. Set the password which will be your email address
password = 'zachkeskinen@gmail.com'

# 4. Set dates to get IMS from
dates = ('2019-12-20', '2020-01-10')
dates = [{'date': datetime.strptime(d, '%Y-%m-%d'), 'year': int(d.split('-')[0]), 'doy':datetime.strptime(d, '%Y-%m-%d').timetuple().tm_yday} for d in dates]

In [None]:
from datetime import datetime
dates = ('2019-12-20', '2020-01-10')
dates = [{'date': datetime.strptime(d, '%Y-%m-%d'), 'year': int(d.split('-')[0]), 'doy':datetime.strptime(d, '%Y-%m-%d').timetuple().tm_yday} for d in dates]

In [None]:
############################################
### Don't need to change this code below ###
############################################
# FTP server
ftpdir = 'sidads.colorado.edu'

#Connect and log in to the FTP
print('Logging in')
ftp = FTP(ftpdir)
ftp.login('anonymous',password)

#Change to the destination directory on own computer where you want to save the files
os.chdir(destdir)

assert dates[0]['date'] < dates[1]['date']

all_files = []
# Change to the directory where the files are on the FTP
for year in range(dates[0]['year'], dates[1]['year']+1):
    print('Changing to '+ join(directory, str(year)))
    ftp.cwd(join(directory, str(year)))

    # Get a list of the files in the FTP directory
    files = ftp.nlst()
    files = files[2:]
    if year == dates[0]['year']:
        files = [f for f in files if int(f.split('_')[0][-3:]) > dates[0]['doy']]
    elif year == dates[1]['year']:
        files = [f for f in files if int(f.split('_')[0][-3:]) < dates[1]['doy']]

    # #Download all the files within the FTP directory
    for f in files:
        print('Downloading...' + f)
        ftp.retrbinary('RETR ' + f, open(f, 'wb').write)

#Close the FTP connection
ftp.quit()

In [None]:
f'{1:03}'

In [None]:
import numpy as np
desc = parse_lines('/Users/zachkeskinen/Documents/c-snow/contrib/keskinen/data/ims2019355_00UTC_1km_v1.3.asc')
rows, cols = desc['Dimensions'].split()
rows, cols = int(rows), int(cols)
ascii_grid = np.loadtxt("/Users/zachkeskinen/Documents/c-snow/contrib/keskinen/data/ims2019355_00UTC_1km_v1.3.asc", skiprows=30)

In [None]:
np.

In [None]:
np.genfromtxt("/Users/zachkeskinen/Documents/c-snow/contrib/keskinen/data/ims2019355_00UTC_1km_v1.3.asc", skip_header=30, delimiter = 1, dtype = int).shape