# Analysis of historic Irish weather

## Author: Joanna Mnich

In [174]:
import pandas as pd
import matplotlib.pyplot as plt

In [175]:
# Path to the raw CSV file
file_path = ("data/raw/Roches_point.csv")

# Load the CSV, skipping the first 19 rows (headers and metadata)
with open(file_path, "r", encoding="utf-8") as f:
    for i, line in enumerate(f):
        if line.strip().startswith("year"):
            header_row = i
            break


In [187]:
# Load the data, specifying the correct columns
df = pd.read_csv(file_path, skiprows=19)
                
# Strip whitespace from column names
for col in ["year", "month", "wdsp"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

print(df.head())
print(df.dtypes)

   year  month  meant  maxtp  mintp  mnmax  mnmin  rain   gmin  wdsp maxgt  \
0  1941     11    6.9   14.0   -3.1    9.9    3.9  67.2   -5.7  12.0         
1  1941     12    6.5   12.7   -3.6    9.1    3.9  41.7   -7.6  12.5         
2  1942      1    4.3   11.9   -3.1    6.9    1.7  91.9   -9.5  13.1         
3  1942      2    2.9   11.6   -4.3    5.8   -0.0  25.8  -10.7   9.0         
4  1942      3    6.3   16.2   -6.1    9.4    3.2  76.4   -8.3  10.7         

    sun  
0  56.1  
1  46.1  
2  72.8  
3  51.4  
4  73.9  
year       int64
month      int64
meant    float64
maxtp    float64
mintp    float64
mnmax    float64
mnmin    float64
rain     float64
gmin      object
wdsp     float64
maxgt     object
sun      float64
dtype: object


In [177]:
# Strip whitespace from column names
df.columns = df.columns.str.strip().str.lower()
df. columns


Index(['year', 'month', 'meant', 'maxtp', 'mintp', 'mnmax', 'mnmin', 'rain',
       'gmin', 'wdsp', 'maxgt', 'sun'],
      dtype='object')

In [184]:
# Keep only the data for years 2005 to 2025 
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Display the first few rows of the cleaned data
print(df.head())

     year  month  wdsp
758  2005      1  15.8
759  2005      2  11.5
760  2005      3  10.5
761  2005      4  11.1
762  2005      5  10.8


In [186]:
# Save the cleaned data to the processed folder
df.to_csv("data/processed/roches_point_cleaned.csv", index=False)

In [None]:
print(df.shape)

(251, 3)


In [None]:
# Path to the raw CSV file
file_path = "data/raw/SherkinIsland.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/sherkin_island_cleaned.csv", index=False)


    year  month  wdsp
7   2005      1  18.7
8   2005      2  11.7
9   2005      3  11.3
10  2005      4  12.1
11  2005      5      


In [182]:
# Path to the raw CSV file
file_path = "data/raw/Valentia_Observatory.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/valentia_observatory_cleaned.csv", index=False)



     year  month  wdsp
783  2005      1  14.5
784  2005      2   8.5
785  2005      3   8.7
786  2005      4   9.5
787  2005      5   9.2


In [183]:
# Path to the raw CSV file
file_path = "data/raw/Dublin_Airport.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/dublin_airport_cleaned.csv", index=False)


     year  month  wdsp
758  2005      1  15.8
759  2005      2  11.5
760  2005      3  10.5
761  2005      4  11.1
762  2005      5  10.8
