In [10]:
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt

In [11]:
folder_path = "../data/"

file_paths = sorted(glob.glob(os.path.join(folder_path, "result*.txt")))

all_data = []

for file_path in file_paths:
    df = pd.read_csv(
        file_path,
        comment="#",
        header=None,
        names=["station", "date", "temperature"],
        na_values=[" ", "", "NaN"]
    )
    #Convert to numeric
    df["temperature"] = pd.to_numeric(df["temperature"], errors="coerce")
    # Only keep valid rows
    df = df.dropna(subset=["temperature"])
    #Convert to degrees Celsius
    df["temperature"] = df["temperature"] / 10.0
    # Convert date to string and extract YYYYMM
    df["date"] = df["date"].astype(str)
    df["year_month"] = df["date"].str[:4] + "-" + df["date"].str[4:6]
    df["day"] = df["date"].str[:8]

    all_data.append(df)

# Combine all data
combined = pd.concat(all_data)

#Average over all stations
daily_avg = combined.groupby("day")["temperature"].mean().reset_index()

daily_avg["year_month"] = daily_avg["day"].str[:4] + "-" + df["date"].str[4:6]

#Average over all days
monthly_avg = daily_avg.groupby("year_month")["temperature"].mean().reset_index()

#Save
output_path = os.path.join(folder_path, "monthly_national_avg_wind_speed.csv")
monthly_avg.to_csv(output_path, index=False)

print(f"Saved monthly averages to: {output_path}")

Saved monthly averages to: ../data/monthly_national_avg_wind_speed.csv


In [15]:
folder_path = ""
output_path = os.path.join(folder_path, "monthly_national_avg_wind_speed.csv")
monthly_avg.to_csv(output_path, index=False)


In [12]:
%matplotlib inline

In [13]:
df = pd.read_csv("../data/monthly_national_avg_wind_speed.csv")


df["year_month"] = pd.to_datetime(df["year_month"], format="%Y%m")


plt.figure(figsize=(12, 6))
plt.plot(df["year_month"], df["temperature"], marker='o', linestyle='-')


plt.title("Monthly Average Wind Speed in the Netherlands (2014–2025)", fontsize=14)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Temperature (°C)", fontsize=12)
plt.grid(True)
plt.tight_layout()

plt.show()

ValueError: time data "2014-01" doesn't match format "%Y%m", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.