##### **Collect data from KNMI using knmy package**

Previously not used from the codebse, but now I am using this to download the data manually from the KNMI website, due to certain updates of the datasets

In [5]:
from knmy_updated import knmy
from datetime import datetime

In [8]:
# KNMY GitHub: https://github.com/barthoekstra/knmy/tree/master
# KNMY Documentation: https://knmy.readthedocs.io/en/latest/


SM = 7  # Starting month
SD = 1  # Starting day
EM = 12  # Ending month
ED = 30 # Ending day

# Groningen stations
Lauwersoog = 277
Eelde = 280
NieuwBeerta = 286

# Utrecht statios(s)
DeBilt = 260

req_stations = [DeBilt]
# req_variables = ['WIND', 'TEMP', 'SUNR', 'PRCP']
req_variables = ['ALL']
cities = {
    'Utrecht': [DeBilt],
}

In [9]:

import time
from requests.exceptions import HTTPError

years = [2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]

# Define all variable groups we want
req_variables = ['WIND', 'TEMP', 'SUNR', 'PRCP', 'PRES', 'VICL', 'MSTR']

def fetch_with_retry(year, city, stations, max_retries=3):
    for attempt in range(max_retries):
        try:
            s_moment = datetime(year, SM, SD, 0)
            e_moment = datetime(year, EM, ED, 23)
            
            # Fetch data
            disclaimer, stations, variables, data = knmy.get_knmi_data(
                type='hourly',
                stations=stations,
                start=s_moment,
                # variables=req_variables,
                end=e_moment,
                inseason=False,
                parse=True
            )
            
            return data
            
        except HTTPError as e:
            if attempt == max_retries - 1:  # Last attempt
                print(f"Failed to fetch data for {city} {year} after {max_retries} attempts: {e}")
                raise
            print(f"Attempt {attempt + 1} failed, retrying after delay...")
            time.sleep(5 * (attempt + 1))  # Increasing delay between retries
            
for year in years:
    for city, stations in cities.items():
        print(f"Processing {city}...")
        try:
            data = fetch_with_retry(year, city, stations)
            
            # Handle duplicate columns and save
            data = data.loc[:, ~data.columns.duplicated()]
            data.to_csv(
                f"../../data/data_raw/{year}_meteo_{city}.csv", 
                index=True, 
                sep=';', 
                decimal='.', 
                encoding='utf-8'
            )
            print(f"Saved {city} data with shape {data.shape} of year {year}")
            
            # Add delay between cities
            time.sleep(2)
            
        except Exception as e:
            print(f"Error processing {city} for {year}: {str(e)}")
            continue
        
        # Add delay between years
        time.sleep(1)


Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2016
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2017
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2018
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2019
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2020
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2021
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2022
Processing Utrecht...
Saved Utrecht data with shape (4416, 25) of year 2023


##### **2016**

In [None]:
s_moment = datetime(2016, SM, SD, 0)
e_moment = datetime(2016, EM, ED, 23)

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2016_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2017**

In [None]:
s_moment = datetime(2017, SM, SD, 0)
e_moment = datetime(2017, EM, ED, 23)

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(variables)

In [None]:
print(data)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2017_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2018**

In [None]:
s_moment = datetime(2018, SM, SD, 0)
e_moment = datetime(2018, EM, ED, 23)

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2018_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2019**

In [None]:
s_moment = datetime(2019, SM, SD, 0)
e_moment = datetime(2019, EM, ED, 23)

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2019_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2020**

In [None]:
s_moment = datetime(2020, SM, SD, 0)
e_moment = datetime(2020, EM, ED, 23) # Schrikkeljaar

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2020_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2021**

In [None]:
s_moment = datetime(2021, SM, SD, 0)
e_moment = datetime(2021, EM, ED, 23)

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2021_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2022**

In [None]:
s_moment = datetime(2022, SM, SD, 0)
e_moment = datetime(2022, EM, ED, 23)

disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
                                                           stations = req_stations,
                                                           variables = req_variables,
                                                           start = s_moment,
                                                           end   = e_moment,
                                                           inseason = False,
                                                           parse = True)

In [None]:
print(data.shape)
data.to_csv(f"../data/data_raw/2022_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')

##### **2023**

Imported manually via: https://www.daggegevens.knmi.nl/klimatologie/uurgegevens, see other file

In [None]:
# s_moment = datetime(2023, SM, SD, 0)
# e_moment = datetime(2023, EM, ED, 23)

# disclaimer, stations, variables, data = knmy.get_knmi_data(type = 'hourly',
#                                                            stations = req_stations,
#                                                            variables = req_variables,
#                                                            start = s_moment,
#                                                            end   = e_moment,
#                                                            inseason = False,
#                                                            parse = True)

In [None]:
# print(data.shape)
# data.to_csv(f"../data/data_raw/2022_meteo_{city}.csv", index = True, sep = ';', decimal = '.', encoding = 'utf-8')