# Estimation of recurrence probabilities

### Preparation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
home_directory = os.path.expanduser( '~' )
os.chdir(home_directory + '/DS_Project/modules')
import warnings
warnings.filterwarnings("ignore")
import pickle
import yaml
config_path = 'config.yml'
with open(config_path, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
path = config['data']['data'] + '/DWD/'
path_app = path + 'app/'

In [2]:
from data_retrieval.DWD.DWDScraper import *
from models.heatwaves.HeatwaveM import *

### Data import

In [3]:
# get scraper
S = DWDScraper()
# modify bounding box to grid
S.bounding_boxes = config['bboxes']['munich']
# get all stations
all_stations_csv = "all-stations.csv"
S.get_all_stations(all_stations_csv, "2014-01-01","2022-12-31")
s = pd.read_csv(config['data']['dwd'] + '/meta/' + all_stations_csv)
munich_ids = S.get_relevant_station_ids(s)
print(munich_ids)

[1262, 3379, 7431]


In [4]:
station_meta = s[s.STATIONS_ID.isin(munich_ids)]
station_meta

Unnamed: 0,STATIONS_ID,VON_DATUM,BIS_DATUM,STATIONSHOEHE,GEOBREITE,GEOLAENGE,STATIONSNAME,BUNDESLAND
88,1262,1992-05-17,2023-07-08,446,48.3477,11.8134,München-Flughafen,Bayern
243,3379,1997-07-01,2023-07-08,515,48.1632,11.5429,München-Stadt,Bayern
473,7431,2007-11-01,2023-07-08,604,48.013,11.5524,Oberhaching-Laufzorn,Bayern


In [5]:
# save meta to pickle
with open(path_app + 'station_meta.pkl', 'wb') as file:
    pickle.dump(station_meta, file)

In [6]:
# scrape data
munich_csv = "munich-all.csv"
S.scrape(munich_csv, "2014-01-01","2023-06-30", munich_ids)
munich_dwd = pd.read_csv(path + munich_csv)
munich_dwd['MESS_DATUM'] = pd.to_datetime(munich_dwd['MESS_DATUM'], format='%Y-%m-%d %H')

In [7]:
h = munich_dwd[munich_dwd.STATIONS_ID == munich_ids[1]]
h.head()

Unnamed: 0,STATIONS_ID,MESS_DATUM,TT_TU,RF_TU
83209,3379.0,2014-01-01 00:00:00,-4.1,96.0
83210,3379.0,2014-01-01 01:00:00,-4.9,96.0
83211,3379.0,2014-01-01 02:00:00,-4.9,95.0
83212,3379.0,2014-01-01 03:00:00,-4.8,97.0
83213,3379.0,2014-01-01 04:00:00,-5.1,97.0


In [8]:
year_range = np.arange(2014,2023,1).tolist()

hourly = pd.DataFrame()
daily = pd.DataFrame()

for idx, loc in enumerate(munich_ids):

    w = HeatwaveM(munich_dwd[munich_dwd.STATIONS_ID == loc])
    w.get_heatwaves_ky(station_id=loc,year=year_range,t_max=30,t_min=25)

    sub = w.groupby(['STATION_ID', 'DATE'], as_index=False).agg({
        'TEMP': ['max', 'min'],
        'HEATWAVE': 'max',
        'IND': 'max'
    }).reset_index(drop=True)
    sub.columns = ['STATION_ID', 'DATE', 'MAX_TEMP', 'MIN_TEMP', 'HEATWAVE', 'IND']

    hourly = pd.concat([hourly, w[['STATION_ID','TIME','DATE','TEMP','HUMID','HEATWAVE','IND']]])
    daily = pd.concat([daily, sub])

In [9]:
hourly.head()

Unnamed: 0,STATION_ID,TIME,DATE,TEMP,HUMID,HEATWAVE,IND
0,1262.0,2014-01-01 00:00:00,2014-01-01,-3.2,100.0,0.0,
1,1262.0,2014-01-01 01:00:00,2014-01-01,-3.1,100.0,0.0,
2,1262.0,2014-01-01 02:00:00,2014-01-01,-3.4,100.0,0.0,
3,1262.0,2014-01-01 03:00:00,2014-01-01,-3.6,100.0,0.0,
4,1262.0,2014-01-01 04:00:00,2014-01-01,-3.9,100.0,0.0,


In [10]:
daily.head()

Unnamed: 0,STATION_ID,DATE,MAX_TEMP,MIN_TEMP,HEATWAVE,IND
0,1262.0,2014-01-01,1.8,-5.6,0.0,
1,1262.0,2014-01-02,5.2,-3.6,0.0,
2,1262.0,2014-01-03,7.2,-3.5,0.0,
3,1262.0,2014-01-04,5.6,1.1,0.0,
4,1262.0,2014-01-05,5.6,0.3,0.0,


In [11]:
with open(path_app + 'daily.pkl', 'wb') as file:
    pickle.dump(daily, file)
with open(path_app + 'hourly.pkl', 'wb') as file:
    pickle.dump(hourly, file)