# Downloading Extended AVHRR Polar Pathfinder CDR data

In [1]:
import requests
import datetime
import urllib
import re
import xarray as xr
import os
import pathlib

In [8]:
start_date = datetime.date(1990, 1, 1)
end_date = datetime.date(2019, 12, 31)
delta = datetime.timedelta(days=365.25)
download_dir = os.getcwd() + '/download/'
data_dir = os.getcwd() + '/data/'

In [None]:
while start_date <= end_date:
    '''
    Loop over webpages for each year between start_date and end_date until both dates are equal. 
    Afterwards find all .nc files starting with 'Polar-App-X_' string and save the filenames to
    an array => files.
    '''
    year = str(start_date.year)
    month = str(start_date.month).zfill(2)
    day = str(start_date.day).zfill(2)
    
    url = f'https://www.ncei.noaa.gov/data/avhrr-polar-pathfinder-extended/access/shem/{year}/'
    content = urllib.request.urlopen(url).read().decode('utf-8')
    
    files = re.findall('"(Polar-APP-X_.*?.nc)"', content)
    
    file_count = 0
    
    for file in files:
        '''
        Loop over all files from array and save the file to /data/ directory.
        '''
        try:
            file_url = url + file
            r = requests.get(file_url, allow_redirects=True)
            open(download_dir + str(file), 'wb').write(r.content)
            #print(f'Download finished: {file}')
        except:
            print(f'Download failed: {file}')
        
        nc_file = download_dir + file
        
        ds  = xr.open_dataset(nc_file)
        albedo = ds['cdr_surface_albedo']
        temperature = ds['cdr_surface_temperature']
        
        try:
            data = xr.merge([albedo, temperature], compat='identical')
        except:
            print(f'Merge failed: {file}')
            
        merged_dataset_name = data_dir + str(file)
        
        try:
            data.to_netcdf(merged_dataset_name, mode='w')
        except:
            print(f'File couldnt be saved: {merged_dataset_name}')
        
        try:
            os.remove(download_dir + str(file))
        except:
            print(f'File couldnt be deleted: {file}')
            
        file_count += 1

    start_date += delta
    print(f'NEXT YEAR: {start_date}')
print(f'ALL DONE! Last date: {start_date}')
print(f'{file_count} files were downloaded!')

NEXT YEAR: 2011-01-01
NEXT YEAR: 2015-12-31
NEXT YEAR: 2016-12-30
NEXT YEAR: 2019-12-30
