In [30]:
import netCDF4 as net
import numpy   as np
import pandas  as pd
import xarray  as xr
import matplotlib.pyplot as plt
import shutil
import urllib.request as request
from contextlib import closing
from ftplib import FTP 
from datetime import datetime
import os


systempath ="C:/Users/adria/Desktop/Uni/Semester5/Geosoft2/Code/Notebooks/SST/Data/"

In [31]:
def download_file(year):
    '''
    Downloads the sst data file for the given year
    
    Parameters:
        year (int): The year the sst is needed
   '''
    start = datetime.now()
    ftp = FTP('ftp.cdc.noaa.gov')
    ftp.login()
    ftp.cwd('/Projects/Datasets/noaa.oisst.v2.highres/')

    files = ftp.nlst()
    counter = 0

    for file in files:
        if file == 'sst.day.mean.'+ str(year)+'.nc':
            print("Downloading..." + file)
            ftp.retrbinary("RETR " + file, open(systempath + file, 'wb').write)      
            ftp.close()
            end = datetime.now()
            diff = end - start
            print('All files downloaded for ' + str(diff.seconds) + 's')
        else: counter += 1
    
        if counter == len(files):
            print('No matching dataset found for this year')

In [32]:
def merge_datacubes(ds_merge):
    '''
    merges datacubes by coordinates
    
    Parameters:
        ds_merge (array): array of datasets to be mearched
        
    Returns: 
        ds1 (ds): A single datacube with all merged datacubes
        - Error, if no Datacubes given
    '''
    start = datetime.now()
    if len(ds_merge) == 0:
        print("error")
        return
    if len(ds_merge) == 1:
        return ds_merge[0]
    else:
        print('start merging')
        ds1 = ds_merge[0]
        count = 1
        while count < len(ds_merge):
            start1 = datetime.now()
            ds1 =  xr.combine_by_coords([ds1,ds_merge[count]])
            count+=1
            print("succesfully merged cube nr "+ str(count)+" to the base cube ")
            end = datetime.now()
            diff = end - start1
            print('All cubes merged for ' + str(diff.seconds) + 's')
        print("result: ")
        print(ds1)
        end = datetime.now()
        diff = end - start
        print('All cubes merged for ' + str(diff.seconds) + 's')
        return ds1

In [34]:
def timeframe(ds,start,end):
    '''
    Slices Datacube down to given timeframe
      
    Parameters:
        ds (ds): source dataset
        start (str): start of the timeframe eg '2018-07-13'
        end (str): end of the timeframe eg '2018-08-23'
       
    Returns:
        ds_selected (ds): dataset sliced to timeframe
    '''
    if start>end:
        print("start and end of the timeframe do are not compatible!")
    else:    
        ds_selected = ds.sel(time = slice(start,end))
        print(ds_selected)
        return ds_selected    

In [35]:
def safe_datacube(ds, timeframe):
    '''
    Saves the Datacube as NetCDF (.nc)
      
    Parameters:
        ds (ds): source dataset
        timeframe (str): timeframe eg '2017', '2015_2019'
    '''
    print("start saving")
    if type(timeframe) != str:
        timeframe=str(timeframe)
    ds.to_netcdf(systempath+ "sst.day.mean."+timeframe+".nc")
    print("done saving")

Example

In [36]:
download_file (2018)
download_file (2019)
ds_2018 = xr.open_dataset(systempath+"sst.day.mean.2018.nc")
ds_2019 = xr.open_dataset(systempath+"sst.day.mean.2019.nc")
ds_merge = [ds_2018,ds_2019]
ds_2018_2019 = merge_datacubes(ds_merge)
ds_sliced = timeframe(ds_2018_2019,'2018-07-01','2019-08-01')
safe_datacube(ds_sliced,'Juli18_Juli19')

Downloading...sst.day.mean.2018.nc
All files downloaded for 114s
Downloading...sst.day.mean.2019.nc
All files downloaded for 104s
start merging


MemoryError: Unable to allocate 1.41 GiB for an array with shape (365, 720, 1440) and data type float32