In [123]:
import os
import shutil
import requests
import datetime
import pandas as pd



# Goal:
# Write file with all recorded parameters as columns. Each row is a timestamp.
# TIMESTAMP            PARAM_1   PARAM_2   ...   PARAM_n
# 2020-11-13T00:00:00Z 0.0000    0.0000    ...   0.0000
# 2020-11-13T01:00:00Z 0.0000    0.0000    ...   0.0000
# 2020-11-13T02:00:00Z 0.0000    0.0000    ...   0.0000
#          ...           ...       ...     ...     ...
# 2020-11-13T23:00:00Z 0.0000    0.0000    ...   0.0000
#
# 
# Example:
# points = [{'id': '149', 'lat': 65.59405, 'lon': 19.26423}, {'id': '171', 'lat': 65.81389, 'lon': 21.63525}]
# start_date = datetime.date(2020, 9, 1)
# end_date = datetime.date(2020, 9, 7)
# GRIB_to_CSV(points, start_date, end_date)
#
# @params points: list of dicts with station id, lat and long
#                 example: {'id': '149', 'lat': 65.59405, 'lon': 19.26423}
#         start_date: date object. fetched data will include start_date.
#                     example: datetime.date(2020, 9, 1)
#         end date: date object. fetched data will include end_date.
def GRIB_to_CSV(points, start_date, end_date):
    
    # Define directories.
    gribs_dir = 'GRIBS/'
    gribs_raw_dir = gribs_dir + 'Raw/'
    gribs_selected_dir = gribs_dir + 'Selected/'
    tabs_selected_dir = 'TABS/'
    csv_dir = 'MESAN_CSV/'
    msg_length = 100
    
    # Create necessary directories.
    if os.path.isdir(gribs_dir):
        print(gribs_dir + ' exists.')
        
        # Deal with if when subfolders don't exist but GRIBS/ does.
        if os.path.isdir(gribs_raw_dir):
            pass
        else:
            print('Creating ' + gribs_raw_dir + '.')
            os.mkdir(gribs_raw_dir)
            
        if os.path.isdir(gribs_selected_dir):
            pass
        else:
            print('Creating ' + gribs_selected_dir + '.')
            os.mkdir(gribs_selected_dir)
    else:
        print('Creating ' + gribs_dir + '.')
        os.mkdir('GRIBS/')
        print('Creating ' + gribs_raw_dir + '.')
        os.mkdir('GRIBS/Raw')
        print('Creating ' + gribs_selected_dir + '.')
        os.mkdir('GRIBS/Selected')
        
    if os.path.isdir(tabs_selected_dir):
        print(tabs_selected_dir + ' exists.')
    else:
        print('Creating ' + tabs_selected_dir)
        os.mkdir(tabs_selected_dir)
    
    if os.path.isdir(csv_dir):
        print(csv_dir + ' exists.')
    else:
        print('Creating ' + csv_dir)
        os.mkdir(csv_dir)
    
    # Write filter file.
    with open('filter.txt', 'w') as filter_file:
        filter_file.write('write "GRIBS/Selected/selected_[shortName]_[levelType].grib";')
    
    
    # Loop over each day (Include end_date).
    current_date = start_date
    for n in range(0, (end_date - start_date + datetime.timedelta(days=1)).days):
        
        # Message to user.
        date_str = current_date.strftime('%Y-%m-%d')
        msg = '=== WORKING ON DATE ' + date_str + ' '
        print(msg + '='*(msg_length - len(msg)))
        
        # Check if files exists for all station and current date.
        # If all files already exists, skip this date.
        csv_files = []
        for r, d, f in os.walk(csv_dir):
            for file in f:
                if 'MESAN_' + date_str + '.csv' in file:
                    #print(os.path.join(r, file))
                    csv_files.append(os.path.join(r, file))
        skip_date = True        
        for point in points:
            if csv_dir + point['id'] + '/MESAN_' + date_str + '.csv' not in csv_files:
                skip_date = False
                break
        if skip_date:
            print('.csv files already written for all stations for ' + date_str + '.')
            current_date = current_date + datetime.timedelta(days=1)
            continue
        
        # Build timestamps flag dict:
        timestamps = {}
        for h in range(0, 24):
            hour_str = ''
            if h < 10:
                hour_str = '0' + str(h)
            else:
                hour_str = str(h)
            
            tmp_ts_str = date_str + 'T' + hour_str + ':00:00Z'
            timestamps[tmp_ts_str] = False
        
        current_file = 'MESAN_' + date_str + '.grib'
        
        # Loop over each hour.
        print('Building GRIB-file.')
        with open(gribs_raw_dir + current_file, 'ab') as grib_file:
            for ts in timestamps:

                hour_str = ts.split('T')[1].split(':')[0]
                date_str = ts.split('T')[0]
                
                # Fetch data.
                url = 'https://opendata-download-grid-archive.smhi.se/data/6/'+ current_date.strftime("%Y%m") + '/MESAN_' + current_date.strftime("%Y%m%d") + hour_str + '00+000H00M'
                
                msg = 'Accessing ' + url
                if hour_str == '23':
                    print(msg + ' '*(msg_length - len(msg)), end='\n')
                else:
                    print(msg + ' '*(msg_length - len(msg)), end='\r')

                try:
                    # If successful fetch, flag fetch as 'good'.
                    grib_data = requests.get(url, allow_redirects=True)
                    timestamps[ts] = True
                except requests.exceptions.RequestException:
                    # Handle unsuccessful fetch somehow.
                    print('REQUESTS EXCEPTION!!!')
                    continue

                # Append to GRIB-file.
                grib_file.write(grib_data.content)
                    
        # At this point, we have a GRIB file for current_date with 1h resolution.
        
        # Clear GRIBS/Selected/ perhaps to guarantee that no old
        # selected parameters are appertaining to previous dates.
        # Basically, all selected files should correspond to current_date.
        
        # Run grib_filter bash.
        grib_file_name = gribs_raw_dir + 'MESAN_' + date_str + '.grib'
        !grib_filter filter.txt {grib_file_name}
        
        # Find all files in GRIBS/Selected/.
        grib_files = []
        for r, d, f in os.walk(gribs_selected_dir):
            for file in f:
                if '.grib' in file:
                    grib_files.append(file)
        
        # For all selected_PARAM.grib files, create corresponding tabular files for selected longlat.
        for point in points:
            
            # Message to user.
            msg = '------ Extracting parameter data for point ' + point['id'] + ' '
            print(msg + '-'*(msg_length-len(msg)))
            
            # Loop over selected grib-files and create tabulars.
            # This is where a latlong point is specified and we can get
            # tabular data from grib data.
            for f in grib_files:
                f_name = f.split('.')[0]
                grib_f_dir = gribs_selected_dir + f_name + '.grib'
                tab_f_dir = tabs_selected_dir + f_name + '.tabular'
                msg = 'Creating ' + f_name + '.tabular...'
                
                if f == grib_files[-1]:
                    print(msg + ' '*(msg_length-len(msg)), end='\n')
                else:
                    print(msg + ' '*(msg_length-len(msg)), end='\r')
                    
                !grib_get -l {point['lat']},{point['lon']},1  -W 50 -p dataDate {grib_f_dir} > {tab_f_dir}
        
            
            # Find all selected tabular_files.
            tab_files = []
            for r, d, f in os.walk(tabs_selected_dir):
                for file in f:
                    if '.tabular' in file:
                        tab_files.append(file)
            
            # Create a new dataframe comb_df, insert timestamps.
            comb_df = pd.DataFrame()
            hours = []
            for ts in timestamps:
                if timestamps[ts]:
                    hours.append(ts)
            comb_df.insert(0, 'Timestamp', hours, allow_duplicates=False)
            
            # Loop over each tabular file:
            for f in tab_files:
                
                # Message to user.
                msg = 'Reading ' + f + '...'
                if f == tab_files[-1]:
                    print(msg + ' '*(msg_length-len(msg)), end='\n')
                else:
                    print(msg + ' '*(msg_length-len(msg)), end='\r')
                
                # Open as dataframes, add parameter column to comb_df
                param = f.split('.tabular')[0].split('selected')[1][1:]
                df = pd.read_csv(tabs_selected_dir + f, header=None, names=["Date", "Param"], delim_whitespace=True)
                comb_df.insert(len(comb_df.columns), param, df['Param'], allow_duplicates=False)
            
            
            # Save table as MESAN_DATE.csv in station folder.
            if os.path.isdir(csv_dir + point['id'] + '/'):
                pass
            else:
                print('Creating ' + csv_dir + point['id'] + '/' + 'directory.')
                os.mkdir(csv_dir + point['id'] + '/')
            print('Saving ' + 'MESAN_' + date_str + '.csv')
            comb_df.to_csv(csv_dir + point['id'] + '/' + 'MESAN_' + date_str + '.csv', index=False)
            
        # Remove .grib and .tabular files.
        # Remove all selected tabular files.
        print('Removing temporary .tabular files.')
        tab_files = []
        for r, d, f in os.walk(tabs_selected_dir):
            for file in f:
                if '.tabular' in file:
                    tab_files.append(file)
        for file_name in tab_files:
            os.remove(tabs_selected_dir + file_name)
        
        # Remove all selected grib files.
        print('Removing temporary .grib files.')
        grib_files = []
        for r, d, f in os.walk(gribs_selected_dir):
            for file in f:
                if '.grib' in file:
                    grib_files.append(file)
        for file_name in grib_files:
            os.remove(gribs_selected_dir + file_name)
            
        # Remove raw grib file.
        os.remove(gribs_raw_dir + 'MESAN_' + date_str + '.grib')
        
        current_date = current_date + datetime.timedelta(days=1)
        
    # Cleanup.
    shutil.rmtree(gribs_dir)
    shutil.rmtree(tabs_selected_dir)


In [121]:
# Example
points = [{'id': '149', 'lat': 65.59405, 'lon': 19.26423}, {'id': '171', 'lat': 65.81389, 'lon': 21.63525}]
start_date = datetime.date(2020, 9, 1)
end_date = datetime.date(2020, 9, 7)
GRIB_to_CSV(points, start_date, end_date)

GRIBS/ exists.
TABS/ exists.
METCOMP_CSV/ exists.
.csv files already written for all stations for 2020-09-01.
.csv files already written for all stations for 2020-09-02.
Building GRIB-file.
Accessing https://opendata-download-grid-archive.smhi.se/data/6/202009/MESAN_202009032300+000H00M   
------ Extracting parameter data for point 149 -----------------------------------------------------
Creating selected_c_sigfr_sfc.tabular...                                                            
Reading selected_prsort_sfc.tabular...                                                              
Saving MESAN_2020-09-03.csv
------ Extracting parameter data for point 171 -----------------------------------------------------
Creating selected_c_sigfr_sfc.tabular...                                                            
Reading selected_prsort_sfc.tabular...                                                              
Saving MESAN_2020-09-03.csv
Removing temporary .tabular files.
Removing tem