We first need to format and clean the data.

In [None]:
import pandas as pd
import os

def clean_wind_data(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    station_name = lines[0].replace('Station Name:', '').strip()
    header_line = 0

    for i, line in enumerate(lines):
        if line.startswith('year,month,meant'):
            header_line = i
            break
    header = lines[header_line].strip().split(',')
    data_rows = []
    for line in lines[header_line+1:]:
        if line.strip():
            data_rows.append(line.strip().split(','))

    df = pd.DataFrame(data_rows, columns=header)
    df['station'] = station_name
    df['wdsp'] = pd.to_numeric(df['wdsp'], errors='coerce')
    df = df[['station', 'year', 'month', 'wdsp']]
    df = df.dropna(subset=['wdsp'])

    return df, station_name

def process_files_separately(file_list):
    os.makedirs('data/wind_clean', exist_ok=True)

    all_dfs = {}
    for file in file_list:
        try:
            print(f"Processing {file}...")
            df, station_name = clean_wind_data(file)
            clean_name = station_name.lower().replace(' ', '_')
            output_file = f"data/wind_clean/{clean_name}_wind_data.csv"
            df.to_csv(output_file, index=False)
            print(f"  Saved {len(df)} rows to {output_file}")
            all_dfs[station_name] = df

        except Exception as e:
            print(f"Error processing {file}: {str(e)}")

    return all_dfs

files = [
    'data/wind_raw/ballyhaise_cavan_monthly.csv',
    'data/wind_raw/dunsany_meath_monthly.csv',
    'data/wind_raw/knock_airport_mayo_monthly.csv',
    'data/wind_raw/oak_park_carlow_monthly.csv',
    'data/wind_raw/shannon_airport_clare_monthly.csv',
]

all_station_data = process_files_separately(files)
if all_station_data:
    for station_name, df in all_station_data.items():
        stats = df['wdsp'].describe()
        print(f"\n{station_name}:")
        print(stats)

Processing data/wind_raw/ballyhaise_cavan_monthly.csv...
  Saved 244 rows to data/wind_clean/ballyhaise_wind_data.csv
Processing data/wind_raw/dunsany_meath_monthly.csv...
  Saved 226 rows to data/wind_clean/dunsany_wind_data.csv
Processing data/wind_raw/knock_airport_mayo_monthly.csv...
  Saved 341 rows to data/wind_clean/knock_airport_wind_data.csv
Processing data/wind_raw/oak_park_carlow_monthly.csv...
  Saved 249 rows to data/wind_clean/oak_park_wind_data.csv
Processing data/wind_raw/shannon_airport_clare_monthly.csv...
  Saved 955 rows to data/wind_clean/shannon_airport_wind_data.csv

BALLYHAISE:
count    244.000000
mean       6.404098
std        1.271136
min        3.300000
25%        5.500000
50%        6.200000
75%        7.200000
max       10.400000
Name: wdsp, dtype: float64

DUNSANY:
count    226.000000
mean       7.935841
std        1.501317
min        4.400000
25%        6.925000
50%        7.600000
75%        8.875000
max       13.000000
Name: wdsp, dtype: float64

KNOCK 