In [None]:
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt

In [None]:
folder_path = "../data/"

file_paths = sorted(glob.glob(os.path.join(folder_path, "rainfall*.txt")))

all_data = []

for file_path in file_paths:
    df = pd.read_csv(
        file_path,
        comment="#",
        header=None,
        names=["station", "date", "rainfall"],
        na_values=[" ", "", "NaN"],
    )
    # Convert to numeric
    df["rainfall"] = pd.to_numeric(df["rainfall"], errors="coerce")
    # Only keep valid rows
    df = df.dropna(subset=["rainfall"])
    # Convert to mm (before was 0.1 mm)
    df["rainfall"] = df["rainfall"] * 10.0

    # Convert date to string and extract YYYY-MM
    df["date"] = df["date"].astype(str)
    df["date_datetime"] = pd.to_datetime(df["date"])
    df["year_month"] = df["date_datetime"].dt.strftime("%Y-%m")

    all_data.append(df)

# Combine all data
combined = pd.concat(all_data)

monthly_avg = combined.groupby(combined["year_month"])["rainfall"].mean().reset_index()

output_path = os.path.join(folder_path, "monthly_national_avg_rainfall.csv")
monthly_avg.to_csv(output_path, index=False)
#
print(f"Saved monthly averages to: {output_path}")

In [None]:
folder_path = ""
output_path = os.path.join(folder_path, "monthly_national_avg_rainfall.csv")
monthly_avg.to_csv(output_path, index=False)

In [None]:
pd.to_datetime(combined["date"]).dt.strftime("%Y-%m").unique()

In [None]:
monthly_avg

In [None]:
%matplotlib inline

In [None]:
df = pd.read_csv("../data/monthly_national_avg_rainfall.csv")


df["year_month"] = pd.to_datetime(df["year_month"], format="%Y-%m")


plt.figure(figsize=(12, 6))
plt.plot(df["year_month"], df["rainfall"], marker="o", linestyle="-")


plt.title("Monthly Average Rainfall in the Netherlands (2019–2025)", fontsize=14)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Wind Speed (m/s)", fontsize=12)
plt.grid(True)
plt.tight_layout()

plt.show()