# Analysis of historic Irish weather

## Author: Joanna Mnich

In [149]:
import pandas as pd
import matplotlib.pyplot as plt

In [150]:
# Path to the raw CSV file
file_path = "data/raw/Roches_point.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
with open(file_path, "r", encoding="utf-8") as f:
    for i, line in enumerate(f):
        if line.strip().startswith("year"):
            header_row = i
            break


In [151]:
# Load the data, specifying the correct columns
df = pd.read_csv(
    file_path,
    skiprows=19,
    sep=",",
    engine="python",
    header=None,
    names=["year", "month", "wdsp"],
    na_values=["---", "NaN"],
    on_bad_lines="skip"
)

# Strip whitespace from column names
for col in ["year", "month", "wdsp"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

print(df.head(10))
print(df.dtypes)

                                                     year  month  wdsp
year month meant maxtp mintp mnmax mnmin rain  gmin   NaN    NaN   NaN
2004 8     15.8  21.4  8.7   18.5  13.1  138.2 4.6   10.7   36.0   NaN
     9     14.7  23.2  7.2   17.7  11.7  93.7  3.5   12.7   48.0   NaN
     10    10.5  16.3  3.5   13.6  7.5   143.7 -1.1  13.7   50.0   NaN
     11    9.7   14.6  2.5   11.9  7.5   24.9  -3.7  10.9   41.0   NaN
     12    8.5   14.2  0.6   10.5  6.4   49.7  -1.8  12.9   68.0   NaN
2005 1     7.9   13.4  0.9   10.4  5.5   75.1  -5.0  17.1   71.0   NaN
     2     6.0   13.6  -0.7  8.5   3.5   27.6  -8.5  12.3   52.0   NaN
     3     8.0   13.6  0.1   10.2  5.9   76.6  -6.3  12.0   38.0   NaN
     4     8.7   13.9  1.7   11.8  5.6   109.9 -2.1  11.6   47.0   NaN
year     float64
month    float64
wdsp     float64
dtype: object


In [152]:
# Strip whitespace from column names
df.columns = df.columns.str.strip().str.lower()
df. columns


Index(['year', 'month', 'wdsp'], dtype='object')

In [153]:
# Keep only the data for years 2005 to 2025 
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])


In [154]:
# Save the cleaned data to the processed folder
df.to_csv("data/processed/roches_point_cleaned.csv", index=False)

In [155]:
print(df.shape)
print(df.head(10))
print(df.isna().sum())


(0, 3)
Empty DataFrame
Columns: [year, month, wdsp]
Index: []
year     0
month    0
wdsp     0
dtype: int64


In [156]:
# Path to the raw CSV file
file_path = "data/raw/SherkinIsland.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/sherkin_island_cleaned.csv", index=False)


    year  month  wdsp
7   2005      1  18.7
8   2005      2  11.7
9   2005      3  11.3
10  2005      4  12.1
11  2005      5      


In [157]:
# Path to the raw CSV file
file_path = "data/raw/Valentia_Observatory.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/valentia_observatory_cleaned.csv", index=False)



     year  month  wdsp
783  2005      1  14.5
784  2005      2   8.5
785  2005      3   8.7
786  2005      4   9.5
787  2005      5   9.2


In [158]:
# Path to the raw CSV file
file_path = "data/raw/Dublin_Airport.csv"

# Load the CSV, skipping the first 19 rows (headers and metadata)
df = pd.read_csv(file_path, skiprows=19)

# Rename the wind speed column to 'wdsp' for consistency
df.rename(columns={"Mean Wind Speed (km/h)": "wdsp"}, inplace=True)

# Keep only the data for years 2005 to 2025
df = df[(df["year"] >= 2005) & (df["year"] <= 2025)]

# Remove rows with missing wind speed data
df = df.dropna(subset=["wdsp"])

# Optional: keep only relevant columns
df = df[["year", "month", "wdsp"]]

# Check the first few rows
print(df.head())

# Save the cleaned data to the processed folder
df.to_csv("data/processed/dublin_airport_cleaned.csv", index=False)


     year  month  wdsp
758  2005      1  15.8
759  2005      2  11.5
760  2005      3  10.5
761  2005      4  11.1
762  2005      5  10.8
