In [38]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import requests
from glob import glob

from pvlib import pvsystem, modelchain, location, irradiance
from pvlib.solarposition import get_solarposition
from pvlib import irradiance, solarposition

def get_hourly_weather_data_for_pvlib(stations, start_date, end_date, timezone='UTC'):
    '''
    Function to get hourly weather variables T (temperature) and
    Q (global radiation) from KNMI 
    
    Args: 
        stations   (str): NMI-stations separated by ':' 
        start_date (str): start date, format yyyymmdd
        end_date   (str): end date (included), format yyyymmdd
        timezone   (str, optional): timezone

    Returns:
        df: DataFrame with DateTime-index, columns T (temp), Q (global radiation) 
    '''

    url = 'https://www.daggegevens.knmi.nl/klimatologie/uurgegevens'

    data = {
        'start': start_date,
        'end': end_date,
        'vars': 'Q:T:FH',
        'stns': stations,
        'fmt': 'json'
    }

    response = requests.post(url, data=data)    
    weather_df = pd.DataFrame(response.json())
    
    # correct units
    weather_df['T'] = weather_df['T'] / 10          # is in 0.1 degrees C, to degrees C
    weather_df['Q'] = weather_df['Q'] * (1 / 0.36)  # is in J/m2, to W / m2
    weather_df['FH'] = weather_df['FH'] * 10        # from 0.1 m/s to m/s         
    
    # create date_time index, convert timezone
    weather_df['hour'] = weather_df['hour'] - 1     # is from 1-24, to 0-23
    weather_df['date_time'] = pd.to_datetime(weather_df['date']) + pd.to_timedelta(weather_df['hour'].astype(int), unit='h')
    weather_df.index = weather_df.date_time
    
    weather_df = weather_df.drop(['station_code', 'date', 'hour', 'date_time'], axis=1)
    weather_df.index = weather_df.index.tz_convert(timezone)

    return weather_df

def process_weather_data(weather_df: pd.DataFrame, lat: float, lon: float) -> pd.DataFrame:
    """
    Process weather data to calculate DNI and DHI.

    Parameters:
    - weather_df: DataFrame containing weather data with datetime index, temperature (T), and GHI (Q).
    - lat: Latitude of the location.
    - lon: Longitude of the location.

    Returns:
    - DataFrame: Processed weather data with added DNI, DHI, no NaN values.
    """
    # Get solar position for the dates / times
    solpos_df = solarposition.get_solarposition(
        weather_df.index, latitude=lat,
        longitude=lon, altitude=0,
        temperature=weather_df['T']
    )
    solpos_df.index = weather_df.index

    # Method 'Erbs' to go from GHI to DNI and DHI
    irradiance_df = irradiance.erbs(weather_df['Q'], solpos_df['zenith'], weather_df.index)
    irradiance_df['ghi'] = weather_df['Q']

    # Add DNI and DHI to weather_df
    columns = ['dni', 'dhi']
    weather_df[columns] = irradiance_df[columns]

    # Fill NaN values with 0
    weather_df.fillna(0, inplace=True)
    
    return weather_df

# Parameters
timezone = 'Europe/Amsterdam'
start_date = '20221231'
end_date = '20231231'
station = '370'
lat = 51.449772459909
lon = 5.3770039280214

# Fetch and process weather data
weather_df = get_hourly_weather_data_for_pvlib(station, start_date, end_date, timezone)
weather_df = process_weather_data(weather_df, lat, lon)

# Cutoff unneeded values
weather_df = weather_df[weather_df.index.year == 2023]
print(weather_df)


                             Q     T    FH  dni  dhi
date_time                                           
2023-01-01 00:00:00+01:00  0.0  16.8  1100  0.0  0.0
2023-01-01 01:00:00+01:00  0.0  16.3  1100  0.0  0.0
2023-01-01 02:00:00+01:00  0.0  15.6  1000  0.0  0.0
2023-01-01 03:00:00+01:00  0.0  15.3  1000  0.0  0.0
2023-01-01 04:00:00+01:00  0.0  15.1  1000  0.0  0.0
...                        ...   ...   ...  ...  ...
2023-12-31 19:00:00+01:00  0.0   9.6  1000  0.0  0.0
2023-12-31 20:00:00+01:00  0.0   8.6   800  0.0  0.0
2023-12-31 21:00:00+01:00  0.0   8.4  1000  0.0  0.0
2023-12-31 22:00:00+01:00  0.0   8.0   900  0.0  0.0
2023-12-31 23:00:00+01:00  0.0   8.6   800  0.0  0.0

[8760 rows x 5 columns]


In [39]:
df = filtered_df.copy()

def extract_daily_values(df):
    daily_data = df.groupby(df.index.date).agg(list)
    keeps = {
        'T':'temperature_sequence',
        'FH':'wind_speed_sequence',
        'dni':'dni_sequence',
        'dhi':'dhi_sequence',
        'Q':'global_irradiance_sequence'
    }

    daily_data=daily_data[keeps.keys()].rename(keeps,axis=1)
    return daily_data

# Extract daily values into lists
daily_values_df = extract_daily_values(df)

# Print the resulting DataFrame
print(daily_values_df)

                                         temperature_sequence  \
2023-01-01  [16.8, 16.3, 15.6, 15.3, 15.1, 14.4, 14.1, 11....   
2023-01-02  [12.2, 12.7, 12.6, 12.7, 12.7, 12.5, 12.4, 10....   
2023-01-03  [4.8, 4.0, 3.1, 3.0, 2.9, 2.7, 3.8, 5.0, 5.3, ...   
2023-01-04  [8.5, 8.9, 9.2, 9.7, 10.0, 10.4, 11.0, 11.1, 1...   
2023-01-05  [11.3, 10.3, 10.1, 10.6, 10.8, 10.9, 10.9, 10....   
...                                                       ...   
2023-12-27  [3.9, 3.3, 3.7, 4.4, 5.0, 5.4, 5.7, 6.4, 7.0, ...   
2023-12-28  [10.5, 10.4, 10.4, 10.1, 10.7, 11.2, 10.8, 10....   
2023-12-29  [11.2, 10.1, 9.6, 10.3, 10.6, 10.2, 10.2, 9.9,...   
2023-12-30  [8.3, 8.0, 8.4, 7.9, 7.3, 6.9, 6.8, 6.8, 6.2, ...   
2023-12-31  [9.7, 9.5, 9.5, 9.4, 9.3, 9.2, 9.5, 10.3, 10.7...   

                                          wind_speed_sequence  \
2023-01-01  [1100, 1100, 1000, 1000, 1000, 900, 800, 600, ...   
2023-01-02  [500, 400, 500, 600, 600, 600, 700, 700, 500, ...   
2023-01-03  [400, 300, 2