# Fetch PROMICE data
This codeblock fetches down relevant weather data from https://promice.org/PromiceDataPortal/#Automaticweatherstations (unless the file is already available in the folder, then it skips).  
This file has taken its source in the following thread: https://stackoverflow.com/questions/51455112/python-script-to-download-data-from-noaa
Script does take a while to run (30+ minutes)....

In [1]:
import errno
import os
import wget # conda install: https://anaconda.org/anaconda/pywget
from pathlib import Path
import pandas as pd
import numpy as np

path = "data/PROMICE"  

# Array information of stations available at PROMICE official site: https://promice.org/WeatherStations.html
PROMICE_stations = [('EGP',(75.6247,-35.9748), 2660), 
                   ('KAN_B',(67.1252,-50.1832), 350), 
                   ('KAN_L',(67.0955,-35.9748), 670), 
                   ('KAN_M',(67.0670,-48.8355), 1270), 
                   ('KAN_U',(67.0003,-47.0253), 1840), 
                   ('KPC_L',(79.9108,-24.0828), 370),
                   ('KPC_U',(79.8347,-25.1662), 870), 
                   ('MIT',(65.6922,-37.8280), 440), 
                   ('NUK_K',(64.1623,-51.3587), 710), 
                   ('NUK_L',(64.4822,-49.5358), 530),
                   ('NUK_U',(64.5108,-49.2692), 1120),
                   ('QAS_L',(61.0308,-46.8493), 280),
                   ('QAS_M',(61.0998,-46.8330), 630), 
                   ('QAS_U',(61.1753,-46.8195), 900), 
                   ('SCO_L',(72.2230,-26.8182), 460),
                   ('SCO_U',(72.3933,-27.2333), 970),
                   ('TAS_A',(65.7790,-38.8995), 890),
                   ('TAS_L',(65.6402,-38.8987), 250),
                   ('THU_L',(76.3998,-68.2665), 570),
                   ('THU_U',(76.4197,-68.1463), 760),
                   ('UPE_L',(72.8932,-54.2955), 220), 
                   ('UPE_U',(72.8878,-53.5783), 940)]

# Function for making directories if they do not exists. 
def mkdir_p(path):
    try:
        os.makedirs(path)
        return 'Path created.'
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            return 'Path already exists!'
        else:
            raise
            
mkdir_p(path)


# Goes through each station and fetch down data online. Necessary manipulations and sorting are made.
for ws in PROMICE_stations:
    url = f'https://promice.org/PromiceDataPortal/api/download/f24019f7-d586-4465-8181-d4965421e6eb/v03/hourly/csv/{ws[0]}_hour_v03.txt'
    file_x = path + f'/{ws[0]}'
    if Path(file_x + '.csv').is_file():
        pass
    else:
        filename = wget.download(url, out=file_x + '.txt')
        #https://stackoverflow.com/a/19759515
        with open(file_x + '.txt') as infile, open(file_x + '_fixed.txt', 'w') as outfile:
            for line in infile:
                outfile.write(" ".join(line.split()).replace(' ', ','))
                outfile.write(",") # trailing comma shouldn't matter
                outfile.write("\n")
        df = pd.read_csv (file_x + '_fixed.txt')
        df = df[['Year','MonthOfYear','DayOfYear','HourOfDay(UTC)','CloudCover','TiltToEast(d)','TiltToNorth(d)','Albedo_theta<70d']]
        df = df[df.Year > 2015]
        #df = df[df.MonthOfYear > 4]
        #df = df[df.MonthOfYear < 9]
        #df = df[df['HourOfDay(UTC)'] > 9] #
        #df = df[df['HourOfDay(UTC)'] < 21]
        df = df.round({'CloudCover': 2, 'TiltToEast(d)': 2, 'TiltToNorth(d)': 2, 'Albedo_theta<70d': 2})
        #df = df[df['Albedo_theta<70d'] < 1.00]
        #df = df[df['Albedo_theta<70d'] > 0.00]
        df['station_name'] = ws[0]
        df['latitude N'] = ws[1][0]
        df['longitude W'] = ws[1][1]
        df['elevation'] = float(ws[2])
        df['TiltToEast(d)'].replace(-999.0, np.nan, inplace=True)
        df['TiltToEast(d)'] = df['TiltToEast(d)'].interpolate(method='nearest', limit_direction='both')
        df['TiltToNorth(d)'].replace(-999.0, np.nan, inplace=True)
        df['TiltToNorth(d)'] = df['TiltToNorth(d)'].interpolate(method='nearest', limit_direction='both')

        df.to_csv(file_x + '.csv', index=None)

# Removes all temporary files made:
filelist = [ f for f in os.listdir(path) if f.endswith(".txt") ]
for f in filelist:
    os.remove(os.path.join(path, f))

# Create one big file "PROMICE.csv" with all the stations data. 
if Path(f'{path}/PROMICE.csv').is_file():
    pass
else: 
    PROMICE = pd.DataFrame()
    filelist = [ f for f in os.listdir(path) if f.endswith(".csv") ]
    for f in filelist:
        PROMICE = PROMICE.append(pd.read_csv(f'{path}/{f}'))
    
    PROMICE['TiltToEast(d)'] = PROMICE['TiltToEast(d)'].abs()
    PROMICE['TiltToNorth(d)'] = PROMICE['TiltToNorth(d)'].abs()
    PROMICE['tilt'] = PROMICE[['TiltToEast(d)','TiltToNorth(d)']].values.max(1)
    PROMICE.drop(['TiltToNorth(d)','TiltToEast(d)'], axis=1, inplace=True)
    PROMICE.rename(columns={'Year': 'year', 
                            'MonthOfYear': 'month',
                            'DayOfYear':'dayofyear',
                            'HourOfDay(UTC)':'hour',
                            'CloudCover':'cloud',
                            'station_name':'station',
                            'Albedo_theta<70d':'PROMICE_alb'}, inplace=True)
    PROMICE = PROMICE.round({'latitude N': 4, 'longitude W': 4})
    PROMICE.to_csv(f'{path}/PROMICE.csv', index=None)

ModuleNotFoundError: No module named 'wget'