In [None]:
ToDo:
    - Tests
        - Fehlermedlung der API einbauen
        - Dask
        - Docker
    - STAC
    - SST Main

In [13]:
import netCDF4 as net
import numpy   as np
import pandas  as pd
import xarray  as xr
import matplotlib.pyplot as plt
import shutil
import urllib.request as request
from contextlib import closing
from ftplib import FTP 
from datetime import datetime
import os

In [14]:
def download_file(year, directory):
    '''
    Downloads the sst data file for the given year
    
    Parameters:
        year (int): The year the sst is needed
   '''
    
    start = datetime.now()
    ftp = FTP('ftp.cdc.noaa.gov')
    ftp.login()
    ftp.cwd('/Projects/Datasets/noaa.oisst.v2.highres/')

    files = ftp.nlst()
    counter = 0

    for file in files:
        if file == 'sst.day.mean.' + str(year) + '.nc':
            print("Downloading..." + file)
            ftp.retrbinary("RETR " + file, open(directory + file, 'wb').write)      
            ftp.close()
            end = datetime.now()
            diff = end - start
            print('All files downloaded for ' + str(diff.seconds) + 's')
        else: counter += 1
    
        if counter == len(files):
            print('No matching dataset found for this year')

In [15]:
def merge_datacubes(ds_merge):
    '''
    Merges datacubes by coordinates
    
    Parameters:
        ds_merge (xArray Dataset[]): Array of datasets to be merged
        
    Returns: 
        ds1 (xArray Dataset): A single datacube with all merged datacubes
    '''
    
    start = datetime.now()
    if len(ds_merge) == 0:
        print("Error: No datacubes to merge")
        return
    if len(ds_merge) == 1:
        return ds_merge[0]
    else:
        print('Start merging')
        ds1 = ds_merge[0]
        count = 1
        while count < len(ds_merge):
            start1 = datetime.now()
            ds1 =  xr.combine_by_coords([ds1, ds_merge[count]])
            count += 1
            print("Succesfully merged cube nr " + str(count) + " to the base cube ")
            end = datetime.now()
            diff = end - start1
            print('All cubes merged for ' + str(diff.seconds) + 's')
        end = datetime.now()
        diff = end - start
        print('All cubes merged for ' + str(diff.seconds) + 's')
        return ds1

In [16]:
def delete(path):
    '''
    Deletes the file/directory with the given path

    Parameters:
        path (str): Path to the file/directory
    '''

    if os.path.exists(path):
        os.remove(path)
        print("File deleted: " + path)
    else:
        print("The file does not exist")

In [17]:
def timeframe(ds, start, end):
    '''
    Slices Datacube down to given timeframe
      
    Parameters:
        ds (ds): Sourcedataset
        start (str): Start of the timeframe eg '2018-07-13'
        end (str): End of the timeframe eg '2018-08-23'
       
    Returns:
        ds_selected (ds): Dataset sliced to timeframe
    '''
    
    if start > end:
        print("Start and end of the timeframe are not compatible!")
    else:    
        ds_selected = ds.sel(time = slice(start, end))
        return ds_selected    

In [18]:
def safe_datacube(ds, name, directory):
    '''
    Saves the Datacube as NetCDF (.nc)
      
    Parameters:
        ds (ds): Sourcedataset
        name (str): Name or timeframe for saving eg '2017', '2015_2019'
    '''
    
    print("Start saving")
    if type(name) != str:
        name = str(name)
    ds.to_netcdf(directory + "sst.day.mean." + name + ".nc")
    print("Done saving")

In [19]:
def main (yearBegin, yearEnd, directory, timeframeBegin, timeframeEnd, name):
    '''
    The main function to download, merge and safe the datacubes
      
    Parameters:
        yearBegin (int): First year to download
        yearEnd (int): Last year to download
        directory (str): Pathlike string to the directory
        timeframeBegin (datetime 64 [ns]): Day to begin the sclice
        timeframeEnd (datetime 64 [ns]): Day to end the sclice
        name (str): Name or timeframe for saving eg 'datacube', '2015_2019'
    '''
    
    if yearBegin > yearEnd:
        print("Wrong years")
    else:
        i = yearBegin
        j = 0
        while i <= yearEnd:
            download_file(i, directory)
            i = i + 1
        if (yearEnd - yearBegin) == 1 or yearBegin == yearEnd:
                print("Nothing to merge")
        else:
                while j <= len(os.listdir(directory)):
                    for filename in os.listdir(directory):
                        cube = xr.open_dataset(filename)
                        ds_merge[j] = cube
                        j = j + 1
                merge_datacubes(ds_merge)
                for filename in os.listdir(directory):
                    filename.close()
                    if filename == "datacube":
                        continue
                    else:
                        delete(os.path.join(directory, filename))
    ds = xr.open_dataset(directory + "datacube")
    ds_sliced = timeframe(ds, timeframeBegin, timeframeEnd)
    safe_datacube(ds_sliced, name, directory)

Execution

In [20]:
yearBegin = 2016
yearEnd = 2018
directory = 'D:/Tatjana/Documents/Studium/Semester 5 - Abgaben/Geosoftware 2/Code/SST_Data/'
timeframeBegin = '2016-01-01'
timeframeEnd = '2018-12-31'
name = "datacube"

main(yearBegin, yearEnd, directory, timeframeBegin, timeframeEnd, name)

Downloading...sst.day.mean.2016.nc
All files downloaded for 61s
Downloading...sst.day.mean.2017.nc
All files downloaded for 78s
Downloading...sst.day.mean.2018.nc
All files downloaded for 50s


FileNotFoundError: [Errno 2] No such file or directory: b'D:\\Tatjana\\Documents\\Studium\\Semester 5 - Abgaben\\Geosoftware 2\\Code\\sst.day.mean.2016.nc'

Example

In [6]:
download_file (2016)
download_file (2017)
ds_2016 = xr.open_dataset(systempath+"sst.day.mean.2016.nc")
ds_2017 = xr.open_dataset(systempath+"sst.day.mean.2017.nc")
ds_merge = [ds_2016,ds_2017]
ds_2016_2017 = merge_datacubes(ds_merge)
ds_sliced = timeframe(ds_2016_2017,'2016-07-01','2017-08-01')
safe_datacube(ds_sliced,'Juli16_Juli17')

Downloading...sst.day.mean.2016.nc
All files downloaded for 55s
Downloading...sst.day.mean.2017.nc
All files downloaded for 53s
start merging
succesfully merged cube nr 2 to the base cube 
All cubes merged for 79s
result: 
<xarray.Dataset>
Dimensions:  (lat: 720, lon: 1440, time: 731)
Coordinates:
  * time     (time) datetime64[ns] 2016-01-01 2016-01-02 ... 2017-12-31
  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon      (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
Data variables:
    sst      (time, lat, lon) float32 nan nan nan nan ... -1.35 -1.8 -1.8 -1.8
Attributes:
    Conventions:    CF-1.5
    title:          NOAA/NCEI 1/4 Degree Daily Optimum Interpolation Sea Surf...
    institution:    NOAA/National Centers for Environmental Information
    source:         NOAA/NCEI https://www.ncei.noaa.gov/data/sea-surface-temp...
    References:     https://www.psl.noaa.gov/data/gridded/data.noaa.oisst.v2....
    dataset_title:  NOAA