# 02_day_night
Sunrise/sunset helpers and day/night flagging using Astral.

In [1]:
# If needed, install dependencies in this environment
# %pip install astral pandas


In [2]:
from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from zoneinfo import ZoneInfo

from astral import LocationInfo
from astral.sun import sun

import pandas as pd

# Default location: Munich (can be overridden)
LAT = 48.1372
LON = 11.5756
TZ_NAME = "UTC"

try:
    ZoneInfo(TZ_NAME)
except Exception:
    TZ_NAME = "UTC"

# Target year
YEAR = 2009


In [3]:
def sunrise_sunset_local(lat: float, lon: float, d: date, tz_name: str = TZ_NAME):
    """Return local sunrise/sunset datetimes for a given date and location."""
    tz = ZoneInfo(tz_name)
    loc = LocationInfo(latitude=lat, longitude=lon, timezone=tz_name)
    s = sun(loc.observer, date=d, tzinfo=tz)
    return s["sunrise"], s["sunset"]


def is_day_for_hour(dt: datetime, sunrise_local: datetime, sunset_local: datetime, tz_name: str = TZ_NAME) -> bool:
    """Classify hour by checking if its local timestamp falls between sunrise and sunset."""
    tz = ZoneInfo(tz_name)
    dt_local = dt.astimezone(tz)
    return (dt_local >= sunrise_local) and (dt_local < sunset_local)


In [4]:
# Example for one date
sr, ss = sunrise_sunset_local(LAT, LON, date(2010, 1, 1))
print('sunrise:', sr)
print('sunset :', ss)

# Example classification for a UTC timestamp
sample_utc = datetime(2010, 1, 1, 6, 0, tzinfo=timezone.utc)
print('sample hour is day:', is_day_for_hour(sample_utc, sr, ss))


sunrise: 2010-01-01 07:04:28.427446+00:00
sunset : 2010-01-01 15:30:09.779849+00:00
sample hour is day: False


In [5]:
# Apply to a DataFrame of hourly UTC timestamps
# Example: assume df has a UTC column 'timestamp_utc'
# You can adjust LAT/LON per station if needed.

def classify_day_night(df: pd.DataFrame, ts_col: str = 'timestamp_utc', lat: float = LAT, lon: float = LON, tz_name: str = TZ_NAME) -> pd.DataFrame:
    df = df.copy()
    # Compute sunrise/sunset per date to avoid recalculating for every row
    dates = pd.DatetimeIndex(df[ts_col]).tz_convert(tz_name).date
    unique_dates = sorted(pd.unique(dates))
    sun_map = {d: sunrise_sunset_local(lat, lon, d, tz_name) for d in unique_dates}

    def flag(row):
        d = row[ts_col].astimezone(ZoneInfo(tz_name)).date()
        sr, ss = sun_map[d]
        return is_day_for_hour(row[ts_col], sr, ss, tz_name)

    df['is_day'] = df.apply(flag, axis=1)
    return df


In [6]:
# VDI rule: night if SU < t <= SA+1 (local decimal hours); else day

def _dec_hours(dt):
    return dt.hour + dt.minute / 60 + dt.second / 3600 + dt.microsecond / 3.6e9

def classify_day_night_vdi(df: pd.DataFrame, ts_col: str = 'timestamp_utc', lat: float = LAT, lon: float = LON, tz_name: str = TZ_NAME) -> pd.DataFrame:
    df = df.copy()
    tz = ZoneInfo(tz_name)
    df[ts_col] = pd.to_datetime(df[ts_col], utc=True, errors='coerce')
    if df[ts_col].isna().any():
        raise ValueError(f"{ts_col} contains non-datetime values")

    df['ts_local'] = pd.DatetimeIndex(df[ts_col]).tz_convert(tz)
    dates = sorted(pd.unique(pd.DatetimeIndex(df['ts_local']).date))
    sun_map = {d: sunrise_sunset_local(lat, lon, d, tz_name) for d in dates}

    def flag(row):
        dt_local = row['ts_local']
        d = dt_local.date()
        sr, ss = sun_map[d]
        h = _dec_hours(dt_local)
        sr_h = _dec_hours(sr.astimezone(tz))
        ss_h = _dec_hours(ss.astimezone(tz))
        return (h > ss_h) or (h <= sr_h + 1)

    df['is_night_vdi'] = df.apply(flag, axis=1)
    df['is_day_vdi'] = ~df['is_night_vdi']
    return df

# Example usage on a df with tz-aware UTC column 'timestamp_utc':
# classified = classify_day_night_vdi(df)
# classified[['timestamp_utc', 'ts_local', 'is_night_vdi']].head()


In [7]:
# Load merged data (timestamps are UTC) and add day/night flag
BASE_DIR = Path(r'U:\\Abt02\\Ref23\\Daten\\LQ-Modellierung\\06_Modellierung\\04_WINMiskam\\02_Meteorologie')
merged_path = BASE_DIR / 'data' / 'processed' / f'merged_wind_cloud_{YEAR}.csv'
df = pd.read_csv(merged_path)
# Source timestamps are UTC; keep them in UTC
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce', utc=True)
df['timestamp_local'] = df['timestamp']

# Drop rows where localization failed (NaT), since sunrise/sunset logic requires valid timestamps
valid_mask = df['timestamp_local'].notna()
dropped = (~valid_mask).sum()
if dropped:
    print(f"dropping {dropped} rows with invalid localized timestamps")
df = df.loc[valid_mask].reset_index(drop=True)

# Add local sunrise (SA) and sunset (SU) for each date
dates = pd.DatetimeIndex(df['timestamp_local']).date
unique_dates = pd.unique(dates)
sun_map = {d: sunrise_sunset_local(LAT, LON, d, TZ_NAME) for d in unique_dates}
df['SA'] = [sun_map[d][0] for d in dates]
df['SU'] = [sun_map[d][1] for d in dates]
df = classify_day_night(df, ts_col='timestamp_local', lat=LAT, lon=LON, tz_name=TZ_NAME)
df = df.rename(columns={'is_day': 'day_night'})
out_path = merged_path.with_name(f'merged_wind_cloud_{YEAR}_day_night.csv')
df.to_csv(out_path, index=False)
print(out_path)
df.head()


U:\Abt02\Ref23\Daten\LQ-Modellierung\06_Modellierung\04_WINMiskam\02_Meteorologie\data\processed\merged_wind_cloud_2009_day_night.csv


Unnamed: 0,STATIONS_ID,MESS_DATUM,QN_3,wind_speed_ms,wind_dir_deg,eor,timestamp,cloud_qn,cloud_cover_oktas,cloud_cover_flag,timestamp_local,SA,SU,day_night
0,3379,2009010100,10,1.5,280,eor,2009-01-01 00:00:00+00:00,3.0,5.0,P,2009-01-01 00:00:00+00:00,2009-01-01 07:04:28.661298+00:00,2009-01-01 15:30:23.765474+00:00,False
1,3379,2009010101,10,1.6,250,eor,2009-01-01 01:00:00+00:00,,,,2009-01-01 01:00:00+00:00,2009-01-01 07:04:28.661298+00:00,2009-01-01 15:30:23.765474+00:00,False
2,3379,2009010102,10,2.0,240,eor,2009-01-01 02:00:00+00:00,,,,2009-01-01 02:00:00+00:00,2009-01-01 07:04:28.661298+00:00,2009-01-01 15:30:23.765474+00:00,False
3,3379,2009010103,10,1.7,250,eor,2009-01-01 03:00:00+00:00,,,,2009-01-01 03:00:00+00:00,2009-01-01 07:04:28.661298+00:00,2009-01-01 15:30:23.765474+00:00,False
4,3379,2009010104,10,1.9,230,eor,2009-01-01 04:00:00+00:00,,,,2009-01-01 04:00:00+00:00,2009-01-01 07:04:28.661298+00:00,2009-01-01 15:30:23.765474+00:00,False
