In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import requests
from glob import glob

from pvlib import pvsystem, modelchain, location, irradiance
from pvlib.solarposition import get_solarposition
from pvlib import irradiance, solarposition

def get_hourly_weather_data_for_pvlib(stations, start_date, end_date, timezone='UTC'):
    '''
    Function to get hourly weather variables T (temperature) and
    Q (global radiation) from KNMI 
    
    Args: 
        stations   (str): NMI-stations separated by ':' 
        start_date (str): start date, format yyyymmdd
        end_date   (str): end date (included), format yyyymmdd
        timezone   (str, optional): timezone

    Returns:
        df: DataFrame with DateTime-index, columns T (temp), Q (global radiation) 
    '''

    url = 'https://www.daggegevens.knmi.nl/klimatologie/uurgegevens'

    data = {
        'start': start_date,
        'end': end_date,
        'vars': 'Q:T:FH',
        'stns': stations,
        'fmt': 'json'
    }

    response = requests.post(url, data=data)    
    weather_df = pd.DataFrame(response.json())
    
    # correct units
    weather_df['T'] = weather_df['T'] / 10          # is in 0.1 degrees C, to degrees C
    weather_df['Q'] = weather_df['Q'] * (1 / 0.36)  # is in J/m2, to W / m2
    weather_df['FH'] = weather_df['FH'] * 10        # from 0.1 m/s to m/s         
    
    # create date_time index, convert timezone
    weather_df['hour'] = weather_df['hour'] - 1     # is from 1-24, to 0-23
    weather_df['date_time'] = pd.to_datetime(weather_df['date']) + pd.to_timedelta(weather_df['hour'].astype(int), unit='h')
    weather_df.index = weather_df.date_time
    
    weather_df = weather_df.drop(['station_code', 'date', 'hour', 'date_time'], axis=1)
    weather_df.index = weather_df.index.tz_convert(timezone)

    return weather_df

def process_weather_data(weather_df: pd.DataFrame, lat: float, lon: float) -> pd.DataFrame:
    """
    Process weather data to calculate DNI and DHI.

    Parameters:
    - weather_df: DataFrame containing weather data with datetime index, temperature (T), and GHI (Q).
    - lat: Latitude of the location.
    - lon: Longitude of the location.

    Returns:
    - DataFrame: Processed weather data with added DNI, DHI, no NaN values.
    """
    # Get solar position for the dates / times
    solpos_df = solarposition.get_solarposition(
        weather_df.index, latitude=lat,
        longitude=lon, altitude=0,
        temperature=weather_df['T']
    )
    solpos_df.index = weather_df.index

    # Method 'Erbs' to go from GHI to DNI and DHI
    irradiance_df = irradiance.erbs(weather_df['Q'], solpos_df['zenith'], weather_df.index)
    irradiance_df['ghi'] = weather_df['Q']

    # Add DNI and DHI to weather_df
    columns = ['dni', 'dhi']
    weather_df[columns] = irradiance_df[columns]

    # Fill NaN values with 0
    weather_df.fillna(0, inplace=True)
    
    return weather_df

# Parameters
timezone = 'Europe/Amsterdam'
start_date = '20221231'
end_date = '20231231'
station = '370'
lat = 51.449772459909
lon = 5.3770039280214

# Fetch and process weather data
weather_df = get_hourly_weather_data_for_pvlib(station, start_date, end_date, timezone)
weather_df = process_weather_data(weather_df, lat, lon)

df = weather_df.copy()


# Filter the DataFrame
filtered_df = df[df.index.year == 2023]

# Print the processed DataFrame
print(filtered_df)


                             Q     T    FH  dni  dhi
date_time                                           
2023-01-01 00:00:00+01:00  0.0  16.8  1100  0.0  0.0
2023-01-01 01:00:00+01:00  0.0  16.3  1100  0.0  0.0
2023-01-01 02:00:00+01:00  0.0  15.6  1000  0.0  0.0
2023-01-01 03:00:00+01:00  0.0  15.3  1000  0.0  0.0
2023-01-01 04:00:00+01:00  0.0  15.1  1000  0.0  0.0
...                        ...   ...   ...  ...  ...
2023-12-31 19:00:00+01:00  0.0   9.6  1000  0.0  0.0
2023-12-31 20:00:00+01:00  0.0   8.6   800  0.0  0.0
2023-12-31 21:00:00+01:00  0.0   8.4  1000  0.0  0.0
2023-12-31 22:00:00+01:00  0.0   8.0   900  0.0  0.0
2023-12-31 23:00:00+01:00  0.0   8.6   800  0.0  0.0

[8760 rows x 5 columns]


In [29]:
df = filtered_df.copy()
def extract_daily_values(df):
    # Group by date, aggregate hourly values into lists
    daily_data = df.groupby(df.index.date).agg(list)
    
    return daily_data

# Extract daily values into lists
daily_values_df = extract_daily_values(df)

# Print the resulting DataFrame
print(daily_values_df)

                                                            Q  \
2023-01-01  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-01-02  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-01-03  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.777...   
2023-01-04  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-01-05  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
...                                                       ...   
2023-12-27  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-12-28  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-12-29  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-12-30  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   
2023-12-31  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   

                                                            T  \
2023-01-01  [16.8, 16.3, 15.6, 15.3, 15.1, 14.4, 14.1, 11....   
2023-01-02  [12.2, 12.7, 12.6, 12.7, 12.7, 12.5, 12.4, 10....   
2023-01-03  [4.8, 4.0, 3

In [32]:
df = daily_values_df.copy()
# Print the size of each list in the DataFrame
list_sizes = df.applymap(len)
print(list_sizes)

             Q   T  FH  dni  dhi
2023-01-01  24  24  24   24   24
2023-01-02  24  24  24   24   24
2023-01-03  24  24  24   24   24
2023-01-04  24  24  24   24   24
2023-01-05  24  24  24   24   24
...         ..  ..  ..  ...  ...
2023-12-27  24  24  24   24   24
2023-12-28  24  24  24   24   24
2023-12-29  24  24  24   24   24
2023-12-30  24  24  24   24   24
2023-12-31  24  24  24   24   24

[365 rows x 5 columns]


  list_sizes = df.applymap(len)
