In [31]:
import sys
sys.path.append('../')
import psycopg2
from configparser import ConfigParser
import requests
from datetime import datetime
from config.settings import settings

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import psycopg2
from configparser import ConfigParser
import re
import logging
import time
import json

### Connect to the database

In [19]:
def connect(config):
    """ Connect to the PostgreSQL database server """
    try:
        # connecting to the PostgreSQL server
        with psycopg2.connect(**config) as conn:
            print('Connected to the PostgreSQL server.')
            return conn
    except (psycopg2.DatabaseError, Exception) as error:
        print(error)

In [20]:
def load_config(filename='../database/database.ini', section='postgresql'):
    parser = ConfigParser()
    parser.read(filename)

    # get section, default to postgresql
    config = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            config[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))

    return config


In [21]:
config = load_config()
connect(config)

Connected to the PostgreSQL server.


<connection object at 0x000002A838CAEBD0; dsn: 'user=postgres password=xxx dbname=debris_flow_dt host=localhost', closed: 0>

In [22]:
def create_weather_table(config):
    """
    Create weather_data_hourly table if it doesn't exist
    """
    create_table_query = """
    CREATE TABLE IF NOT EXISTS weather_data_hourly (
        id SERIAL PRIMARY KEY,
        station_id VARCHAR(50) NOT NULL,
        timestamp TIMESTAMP NOT NULL,
        temperature_c FLOAT,
        humidity_percent FLOAT,
        pressure_hpa FLOAT,
        wind_speed_kmh FLOAT,
        gust_speed_kmh FLOAT,
        precipitation_mm FLOAT,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        data_source VARCHAR(50) DEFAULT 'AWEKAS_TABLE',
        UNIQUE(station_id, timestamp)
    );
    
    CREATE INDEX IF NOT EXISTS idx_station_timestamp 
    ON weather_data_hourly(station_id, timestamp);
    
    CREATE INDEX IF NOT EXISTS idx_timestamp 
    ON weather_data_hourly(timestamp);
    """
    
    try:
        with psycopg2.connect(**config) as conn:
            with conn.cursor() as cur:
                cur.execute(create_table_query)
                conn.commit()
                # logger.info("Table 'weather_data_hourly' created/verified")
                print("Table 'weather_data_hourly' created/verified")
                return True
    except (psycopg2.DatabaseError, Exception) as error:
        # logger.error(f"Error creating table: {error}")
        print(f"Error creating table: {error}")
        return False

### Wrap data from web
AWEKAS WEATHER DATA: Grossglockner Station

In [32]:
station_id = "34362"
base_url = "https://stationsweb.awekas.at"
urls = {
            'index-tab': f"{base_url}/en/{station_id}/index-tab",
            'table': f"{base_url}/en/{station_id}/table",
            'data': f"{base_url}/en/{station_id}/data",
            'statistic': f"{base_url}/en/{station_id}/statistic"
        }

In [33]:
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument('--headless')  # Run without GUI
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--window-size=1920,1080')
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')

In [35]:
def accept_cookies_ionic(driver, timeout=10):
    try:
        # Wait until ion-modal is present
        WebDriverWait(driver, timeout).until(
            lambda d: d.execute_script(
                "return document.querySelector('ion-modal#cookie-banner') !== null"
            )
        )

        # Click "Accept all" inside Shadow DOM
        driver.execute_script("""
            const modal = document.querySelector('ion-modal#cookie-banner');
            if (!modal) return;

            const root = modal.shadowRoot;
            if (!root) return;

            const buttons = modal.querySelectorAll('ion-button');
            for (const btn of buttons) {
                if (btn.innerText.trim().toLowerCase().includes('accept')) {
                    btn.click();
                    return;
                }
            }
        """)
        print("Cookie banner accepted")

        # Small wait to allow modal to close
        time.sleep(0.5)

    except TimeoutException:
        print("No cookie banner found")


In [36]:
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 15)
driver.get(urls['table'])
accept_cookies_ionic(driver)

Cookie banner accepted


In [29]:
date_element = wait.until(
    EC.visibility_of_element_located(
        (By.XPATH, "//div[contains(@class,'date') and contains(@class,'visible')]//ion-text")
    )
)

date_text = date_element.text
print(date_text)   # January 19, 2026


January 19, 2026


In [None]:
date_obj = datetime.strptime(date_text, "%B %d, %Y")
print(f"Date found: {date_obj.date()}")   # 2026-01-19

Date found: 2026-01-19


In [13]:
table = wait.until(
    EC.visibility_of_element_located(
        (By.XPATH, "//div[contains(@class,'card') and contains(@class,'visible')]//table")
    )
)

In [14]:
rows = table.find_elements(By.XPATH, ".//tbody/tr")

data = []

for row in rows:
    cells = row.find_elements(By.TAG_NAME, "td")

    row_data = {
        "time": cells[0].text,
        "temperature": cells[1].text,
        "humidity": cells[2].text,
        "pressure": cells[3].text,
        "wind": cells[4].text,
        "gust": cells[5].text,
        "precipitation": cells[6].text
    }

    data.append(row_data)

for row in data:
    print(row)

{'time': '00:00', 'temperature': '25.9 °F', 'humidity': '94.0%', 'pressure': '0.00 inHg', 'wind': '0.4 mph', 'gust': '0.4 mph', 'precipitation': '0.00 in'}
{'time': '01:00', 'temperature': '25.5 °F', 'humidity': '94.0%', 'pressure': '0.00 inHg', 'wind': '0.0 mph', 'gust': '0.0 mph', 'precipitation': '0.00 in'}
{'time': '02:00', 'temperature': '25.5 °F', 'humidity': '94.0%', 'pressure': '0.00 inHg', 'wind': '0.2 mph', 'gust': '0.2 mph', 'precipitation': '0.00 in'}
{'time': '03:00', 'temperature': '25.2 °F', 'humidity': '94.0%', 'pressure': '0.00 inHg', 'wind': '0.0 mph', 'gust': '0.0 mph', 'precipitation': '0.00 in'}
{'time': '04:00', 'temperature': '25.2 °F', 'humidity': '94.0%', 'pressure': '0.00 inHg', 'wind': '0.2 mph', 'gust': '0.2 mph', 'precipitation': '0.00 in'}
{'time': '05:00', 'temperature': '25.0 °F', 'humidity': '94.0%', 'pressure': '0.00 inHg', 'wind': '0.0 mph', 'gust': '0.0 mph', 'precipitation': '0.00 in'}
{'time': '06:00', 'temperature': '25.0 °F', 'humidity': '94.0%',

In [37]:
def scrape_one_day(driver, wait):
    # Read date
    date_element = wait.until(
        EC.visibility_of_element_located(
            (By.XPATH, "//div[contains(@class,'date') and contains(@class,'visible')]//ion-text")
        )
    )
    date_text = date_element.text
    date_obj = datetime.strptime(date_text, "%B %d, %Y").date()

    # Read table
    table = wait.until(
        EC.visibility_of_element_located(
            (By.XPATH, "//div[contains(@class,'card') and contains(@class,'visible')]//table")
        )
    )

    rows = table.find_elements(By.XPATH, ".//tbody/tr")
    day_data = []

    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")

        def clean_num(text):
            clean = re.sub(r"[^\d\.\-]", "", text)
            return float(clean) if clean else 0.0

        time_str = cells[0].text
        timestamp = datetime.combine(
            date_obj,
            datetime.strptime(time_str, "%H:%M").time()
        )

        day_data.append({
            "timestamp": timestamp,
            "precipitation_mm": clean_num(cells[6].text),
            "temperature_c": clean_num(cells[1].text),
            "humidity_percent": clean_num(cells[2].text),
            "wind_kmh": clean_num(cells[4].text)
        })

    return date_obj, day_data


In [38]:
def go_to_previous_day(driver, wait, current_date_text):
    prev_button = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "//div[contains(@class,'date') and contains(@class,'visible')]//ion-buttons[contains(@class,'left')]//ion-button")
        )
    )

    prev_button.click()

    # Wait until date text changes
    wait.until(
        lambda d: d.find_element(
            By.XPATH,
            "//div[contains(@class,'date') and contains(@class,'visible')]//ion-text"
        ).text != current_date_text
    )


In [39]:
# driver.get(urls['table'])

all_data = []
dates_collected = []
days = 14
for i in range(days):
    date_element = wait.until(
        EC.visibility_of_element_located(
            (By.XPATH, "//div[contains(@class,'date') and contains(@class,'visible')]//ion-text")
        )
    )
    current_date_text = date_element.text

    date_obj, day_data = scrape_one_day(driver, wait)
    dates_collected.append(date_obj)
    all_data.extend(day_data)

    print(f"Collected {len(day_data)} rows for {date_obj}")

    if i < (days-1):
        go_to_previous_day(driver, wait, current_date_text)

# driver.quit()


Collected 18 rows for 2026-01-19
Collected 18 rows for 2026-01-18
Collected 24 rows for 2026-01-17
Collected 24 rows for 2026-01-16
Collected 24 rows for 2026-01-15
Collected 24 rows for 2026-01-14
Collected 24 rows for 2026-01-13
Collected 24 rows for 2026-01-12
Collected 24 rows for 2026-01-11
Collected 24 rows for 2026-01-10
Collected 24 rows for 2026-01-09
Collected 24 rows for 2026-01-08
Collected 24 rows for 2026-01-07
Collected 24 rows for 2026-01-06


### Computing antecedent rainfall

In [40]:
def antecedent_rainfall(data, decay=0.84):
    data_sorted = sorted(data, key=lambda x: x["timestamp"], reverse=True)

    rain_eff = 0.0
    last_day = data_sorted[0]["timestamp"].date()

    for row in data_sorted:
        days_ago = (last_day - row["timestamp"].date()).days
        rain_eff += row["precipitation_mm"] * (decay ** days_ago)

    return rain_eff


In [41]:
ra_eff = antecedent_rainfall(all_data)
print(f"Effective antecedent rainfall: {ra_eff:.3f} mm")

Effective antecedent rainfall: 0.065 mm
