# Analysis of historic Irish weather

## Author: Joanna Mnich

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

In [29]:
# Path to the raw CSV file
file_path = "data/raw/Roches_point.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/roches_point_cleaned.csv", index=False)


   year  month  wdsp
5  2005      1  17.1
6  2005      2  12.3
7  2005      3  12.0
8  2005      4  11.6
9  2005      6   9.7


In [24]:
# Path to the raw CSV file
file_path = "data/raw/SherkinIsland.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/sherkin_island_cleaned.csv", index=False)


    year  month  wdsp
7   2005      1  18.7
8   2005      2  11.7
9   2005      3  11.3
10  2005      4  12.1
11  2005      5      


In [27]:
# Path to the raw CSV file
file_path = "data/raw/Valentia_Observatory.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/valentia_observatory_cleaned.csv", index=False)



     year  month  wdsp
783  2005      1  14.5
784  2005      2   8.5
785  2005      3   8.7
786  2005      4   9.5
787  2005      5   9.2


In [28]:
# Path to the raw CSV file
file_path = "data/raw/Dublin_Airport.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/dublin_airport_cleaned.csv", index=False)


     year  month  wdsp
758  2005      1  15.8
759  2005      2  11.5
760  2005      3  10.5
761  2005      4  11.1
762  2005      5  10.8
