# Analysis of historic Irish weather

## Author: Joanna Mnich

In [174]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def clean_station(file_path, output_path):
    """
    Cleans a raw weather station CSV file and saves a processed version.

    Steps:
    - Detect header row automatically
    - Load data from CSV
    - Normalize column names
    - Convert year, month, and wdsp to numeric
    - Filter data between 2005 and 2025
    - Remove rows with missing wind speed values
    - Save cleaned data to the processed folder
    """

    # Find the row containing column headers
    with open(file_path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f):
            if line.strip().startswith("year"):
                header_row = i
                break

    # Load CSV data
    df = pd.read_csv(file_path, skiprows=header_row)

    # Normalize column names
    df.columns = df.columns.str.strip().str.lower()

    # Convert relevant columns to numeric
    for col in ["year", "month", "wdsp"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Filter years
    df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

    # Remove missing wind speed values
    df = df.dropna(subset=["wdsp"])

    # Keep only required columns
    df = df[["year", "month", "wdsp"]]

    # Save cleaned data
    df.to_csv(output_path, index=False)

    print(f"Cleaned file saved: {output_path} ({len(df)} rows)")
