# File Retrieval

This script downloads required meteorological files from copernicus using the following parameters:
 - Center: Met Office
 - System: 12
 - Variables: 2m Air Temperature, Mean Sea Level Pressure, Total Precipiation
 - Leadtime: 720-1440 hours (24 hour increments)

In [1]:
from tqdm.notebook import tqdm
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta
import numpy as np
import xarray as xr
import cdsapi
import os

First, , where should we be sending the files? and define the meteorological variables.

In [2]:
output_folder = "E:/31-12-2020/forecast-data/"
meteo_vars = ['2m_temperature', 'mean_sea_level_pressure', 'total_precipitation']

In [4]:
def get_max_min_leadtimes(fmonth, fyear, leadtime):
    """ Determines how many hours are there before a day to be forecasted and how many hours should we be forecasting for.
        Parameters:
            fmonth - Month being forecasted from.
            fyear - Year being forecasted from.
            leadtime - Number of months leadtime to give.
        Returns: Number of hours to first day intended for forecasting and number hours to last day of forecasting (inclusive)."""
    fdate = date(fyear, fmonth, 1)
    ddate = fdate + relativedelta(months=leadtime)
    sdate = ddate + relativedelta(months=1) - relativedelta(days=1)
    min_lt = ((ddate - fdate).days * 24)
    max_lt = ((sdate - fdate).days * 24)
    return min_lt, max_lt

def download_files(meteo_vars, output_folder, leadtime=1):
    """ Downloads the meteorological data in monthly files with the given leadtime. This stores them
        with a file name based on the date of the forecast."""
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    for y in tqdm(range(1993, 2016)):
        for m in range(1, 13):
            if (y == 1993) and (m == 1):
                continue
            file_name = output_folder + "{}-{}.grib".format(m, y)
            # Get the leadtimes
            mi, ma = get_max_min_leadtimes(m, y, leadtime)
            leadtimes = list(range(mi, ma+1, 24))
            if not os.path.exists(file_name):
                print("Downloading {}-{}".format(m, y))
                if y == 1993:
                    download_year(meteo_vars, m, y, leadtimes, file_name)
                else:
                    download_year(meteo_vars, m, y, leadtimes, file_name)
            else:
                print("Already exists: {}-{}".format(m, y))
        
def download_year(meteo_vars, month, year, leadtimes, output_file, system='12'):
    """ Downloads a forecast based on the provided parameters, saving it in the intended file. """
    c = cdsapi.Client()
    c.retrieve(
        'seasonal-original-single-levels',
        {
            'originating_centre': 'ukmo',
            'system': system,
            'variable': meteo_vars,
            'year': year,
            'month': month,
            'day': '01',
            'leadtime_hour': leadtimes,
            'area': [
                70, -100, 10,
                20,
            ],
            'format': 'grib',
        },
        output_file)
    
download_files(meteo_vars, output_folder)

HBox(children=(FloatProgress(value=0.0, max=23.0), HTML(value='')))

Already exists: 2-1993
Already exists: 3-1993
Already exists: 4-1993
Already exists: 5-1993
Already exists: 6-1993
Already exists: 7-1993
Already exists: 8-1993
Already exists: 9-1993
Already exists: 10-1993
Already exists: 11-1993
Already exists: 12-1993
Already exists: 1-1994
Already exists: 2-1994
Already exists: 3-1994
Already exists: 4-1994
Already exists: 5-1994
Already exists: 6-1994
Already exists: 7-1994
Already exists: 8-1994
Already exists: 9-1994
Already exists: 10-1994
Already exists: 11-1994
Already exists: 12-1994
Already exists: 1-1995
Already exists: 2-1995
Already exists: 3-1995
Already exists: 4-1995
Already exists: 5-1995
Already exists: 6-1995
Already exists: 7-1995
Already exists: 8-1995
Already exists: 9-1995
Already exists: 10-1995
Already exists: 11-1995
Already exists: 12-1995
Already exists: 1-1996
Already exists: 2-1996
Already exists: 3-1996
Already exists: 4-1996
Already exists: 5-1996
Already exists: 6-1996
Already exists: 7-1996
Already exists: 8-1996
Al

## Extract Relevant Data

Next, from the forecasted meteorological data we extract the information into files describing the month being forecasted. E.g 01-2020 with a leadtime of 720-1440 will be stored in 02-2020.

In [6]:
def extract_data(meteo_folder, output_folder, mvars=['msl', 't2m', 'tp']):
    """ Extracts the meteorological data from the GRIB files in 'meteo_folder' and stores them in independent files
        named after the dates for the data they represent. E.G file 02-2020 at a leadtime of 1 month represents 03-2020."""
    _, _, files = next(os.walk(meteo_folder))
    data_safety = []
    for f in tqdm(files):
        if f[-5:] == ".grib":
            input_fname = meteo_folder + f
            try:
                ds = xr.open_dataset(input_fname, engine='cfgrib')
                for v in mvars:
                    output_fname = output_folder + v + "/" + f[:-5] +".npy"
                    if not os.path.exists(output_fname):
                        if len(ds[v].shape) > 3:
                            data = np.mean(ds[v], axis=0)
                            if len(data.shape) == 4:
                                print(output_fname)
                                data = data[0, :, :, :]
                        else:
                            data = np.array(ds[v])
                        data_safety.append([np.max(data), np.min(data), data.shape, f])
                        np.save(output_fname, data)
            except Exception as e:
                print("Failed to open file: {}".format(input_fname))
                print(e)
            finally:
                ds.close()
    return data_safety
safety = extract_data("E:/31-12-2020/forecast-data/", "E:/31-12-2020/forecastee-data/")

HBox(children=(FloatProgress(value=0.0, max=550.0), HTML(value='')))




In [7]:
def rename_data(data_folder, leadtime=1):
    """ Renames the files to represent the dates for which they actually match. """
    _, _, files = next(os.walk(data_folder))
    for f in tqdm(files):
        date_of_forecast = date(int(f.split('-')[1].split('.')[0]), int(f.split('-')[0]), 1)
        date_forecasted = date_of_forecast + relativedelta(months=1)
        output_file = data_folder + "forecasted-months/{}-{}.npy".format(date_forecasted.month, date_forecasted.year)
        if not os.path.exists(output_file):
            os.rename(data_folder + f, output_file)
rename_data("E:/31-12-2020/forecastee-data/msl/")
rename_data("E:/31-12-2020/forecastee-data/t2m/")
rename_data("E:/31-12-2020/forecastee-data/tp/")

HBox(children=(FloatProgress(value=0.0, max=275.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=275.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=275.0), HTML(value='')))


