In [1]:
import math
import pickle
import warnings
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
from pathlib import Path

warnings.filterwarnings("ignore")

In [2]:
# Apply the default theme
sns.set_theme()

In [3]:
# loading our data.

def load_pickle(path):
    with open(path, "rb") as f:
        return pickle.load(f)

data_files = load_pickle("irish_rain_bundle.pkl")
data_files.keys()

dict_keys(['rain_data_per_month', 'rain_data_per_year', 'all_station_info'])

## We'll work with `rain_data_per_year`

In [4]:
rain_data_per_year = data_files['rain_data_per_year']

### As per the meteorological calendar, **Spring** is March-May, **Summer** is June-August, **Autumn** is September-November, and **Winter** is December-February.

In [5]:
def rainfall_per_season(monthly):
    """
    Sum monthly values (Jan–Dec) into meteorological seasons.

    seasons:
      - Spring: Mar–May
      - Summer: Jun–Aug
      - Autumn: Sep–Nov
      - Winter: Dec–Feb  (Dec of the same list + Jan, Feb)

    Parameters
    ----------
    monthly : list/tuple of length 12
        Monthly values from January to December.
    Returns
    -------
    dict
        {"Spring": ..., "Summer": ..., "Autumn": ..., "Winter": ...}
    """
    
    if len(monthly) != 12:
        raise ValueError("Expected 12 monthly values (Jan–Dec).")

    def season_sum(indices):
        vals = [monthly[i] for i in indices]
        return sum(vals)

    return {
        "Spring": season_sum([2, 3, 4]),     # Mar, Apr, May
        "Summer": season_sum([5, 6, 7]),     # Jun, Jul, Aug
        "Autumn": season_sum([8, 9, 10]),    # Sep, Oct, Nov
        "Winter": season_sum([11, 0, 1]),    # Dec, Jan, Feb
    }

each_year_rainfall_per_station_per_season = {}

for station, rain_per_year in rain_data_per_year.items():
    if station != 'IIP_National series':
        total_rain = []
        each_year_per_season_rainfall = {}
        for year, rain_series in rain_per_year.items():
            each_year_per_season_rainfall[year] = rainfall_per_season(rain_series)
        each_year_rainfall_per_station_per_season[station] = each_year_per_season_rainfall


each_year_rainfall_per_station_per_season.keys()

dict_keys(['Cork Airport', 'Killarney', 'Waterford', 'Cappoquinn', 'Dublin Airport', 'Phoenix Park', 'Strokestown', 'Athboy', 'Belfast', 'Enniscorthy', 'Shannon Airport', 'Malin Head', 'Derry', 'Birr', 'Mullingar', 'Valentia', 'Rathdrum', 'Roches Point', 'Markree Castle', 'University College Galway', 'Foulkesmills', 'Armagh', 'Portlaw', 'Ardara', 'Drumsna'])

In [6]:
each_year_rainfall_per_station_per_season['Cork Airport'].keys()

dict_keys(['1850', '1851', '1852', '1853', '1854', '1855', '1856', '1857', '1858', '1859', '1860', '1861', '1862', '1863', '1864', '1865', '1866', '1867', '1868', '1869', '1870', '1871', '1872', '1873', '1874', '1875', '1876', '1877', '1878', '1879', '1880', '1881', '1882', '1883', '1884', '1885', '1886', '1887', '1888', '1889', '1890', '1891', '1892', '1893', '1894', '1895', '1896', '1897', '1898', '1899', '1900', '1901', '1902', '1903', '1904', '1905', '1906', '1907', '1908', '1909', '1910', '1911', '1912', '1913', '1914', '1915', '1916', '1917', '1918', '1919', '1920', '1921', '1922', '1923', '1924', '1925', '1926', '1927', '1928', '1929', '1930', '1931', '1932', '1933', '1934', '1935', '1936', '1937', '1938', '1939', '1940', '1941', '1942', '1943', '1944', '1945', '1946', '1947', '1948', '1949', '1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959', '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973

In [7]:
each_year_rainfall_per_station_per_season['Cork Airport']['1850']

{'Spring': 298.5,
 'Summer': 236.9,
 'Autumn': 247.7,
 'Winter': 417.30000000000007}

## Finding the driest and wettest year and season for each station.

In [8]:
seasons = ["Spring", "Summer", "Autumn", "Winter"]

def find_season_extremes(each_year_rainfall_per_station_per_season):
    """
    Input shape:
      {
        "StationA": {
            1850: {"Spring": ..., "Summer": ..., "Autumn": ..., "Winter": ...},
            1851: {...},
            ...
        },
        "StationB": {...},
        ...
      }

    Returns per station:
      {
        station: {
          "dry":  {season: (year, value)},
          "wet":  {season: (year, value)},
          "annual_dry": (year, total),     # optional
          "annual_wet": (year, total),     # optional
        },
        ...
      }
    """
    records = {}

    for station, yearly_data in each_year_rainfall_per_station_per_season.items():
        # Per-season extremes
        dry = {s: (None, float('inf')) for s in seasons}
        wet = {s: (None, float('-inf')) for s in seasons}

        # Overall annual extremes (sum of 4 seasons)
        annual_dry = (None, float('inf'))
        annual_wet = (None, float('-inf'))

        for year, sdict in yearly_data.items():
            # guard against missing/NaN values
            # per-season
            for s in seasons:
                val = sdict.get(s)
                if val is None or (isinstance(val, float) and math.isnan(val)):
                    continue
                if val < dry[s][1]:
                    dry[s] = (year, val)
                if val > wet[s][1]:
                    wet[s] = (year, val)

            # annual total
            vals = [sdict.get(s) for s in seasons]
            vals = [v for v in vals if v is not None and not (isinstance(v, float) and math.isnan(v))]
            if len(vals) == 4:  # only use complete years
                total = sum(vals)
                if total < annual_dry[1]:
                    annual_dry = (year, total)
                if total > annual_wet[1]:
                    annual_wet = (year, total)

        records[station] = {
            "dry": dry,
            "wet": wet,
            "annual_dry": annual_dry,
            "annual_wet": annual_wet,
        }

    return records


records = find_season_extremes(each_year_rainfall_per_station_per_season)
records.keys()

dict_keys(['Cork Airport', 'Killarney', 'Waterford', 'Cappoquinn', 'Dublin Airport', 'Phoenix Park', 'Strokestown', 'Athboy', 'Belfast', 'Enniscorthy', 'Shannon Airport', 'Malin Head', 'Derry', 'Birr', 'Mullingar', 'Valentia', 'Rathdrum', 'Roches Point', 'Markree Castle', 'University College Galway', 'Foulkesmills', 'Armagh', 'Portlaw', 'Ardara', 'Drumsna'])

In [9]:
records['Cork Airport']

{'dry': {'Spring': ('1990', 90.19999999999999),
  'Summer': ('1869', 79.8),
  'Autumn': ('1942', 99.10000000000001),
  'Winter': ('1949', 176.5)},
 'wet': {'Spring': ('1947', 502.7),
  'Summer': ('1878', 499.4),
  'Autumn': ('1881', 572.5),
  'Winter': ('1899', 743.6999999999999)},
 'annual_dry': ('1854', 740.1),
 'annual_wet': ('1881', 1636.8999999999999)}

In [10]:
def records_to_wide_singlecol(records: dict, weather_type: str, value_fmt: str = ".1f"):
    stations = sorted(records.keys())
    cols = list(seasons) + ["Annual"]   # seasons is already defined by you
    wide = pd.DataFrame(index=stations, columns=cols, dtype=object)

    for st in stations:
        rec = records.get(st, {})
        sdict = rec.get(weather_type, {})

        # seasons
        for s in seasons:
            yr, val = sdict.get(s, (None, None))
            cell = (int(yr), format(val, value_fmt))
            wide.loc[st, s] = cell

        # annual
        annual_key = "annual_dry" if weather_type == "dry" else "annual_wet"
        ay, av = rec.get(annual_key, (None, None))
        annual_cell = (int(ay), format(av, value_fmt))
        wide.loc[st, "Annual"] = annual_cell

    return wide

dry_extremes = records_to_wide_singlecol(records, "dry")
wet_extremes = records_to_wide_singlecol(records, "wet")

In [11]:
dry_extremes.head()

Unnamed: 0,Spring,Summer,Autumn,Winter,Annual
Ardara,"(1929, 135.3)","(1984, 158.0)","(1933, 244.6)","(1963, 155.2)","(1933, 1121.1)"
Armagh,"(1870, 43.2)","(1995, 79.9)","(1933, 78.6)","(1987, 100.4)","(1933, 549.5)"
Athboy,"(1893, 77.0)","(1995, 94.0)","(1893, 108.4)","(1963, 84.6)","(1893, 587.7)"
Belfast,"(1875, 86.5)","(1976, 109.3)","(1933, 122.8)","(1855, 126.4)","(1855, 697.1)"
Birr,"(1915, 76.5)","(1976, 90.1)","(1912, 108.0)","(1963, 63.9)","(2003, 589.7)"


In [12]:
wet_extremes.head()

Unnamed: 0,Spring,Summer,Autumn,Winter,Annual
Ardara,"(1986, 569.3)","(1861, 637.8)","(1954, 816.6)","(1852, 780.2)","(1872, 2350.0)"
Armagh,"(1947, 295.7)","(2007, 376.7)","(1954, 355.3)","(1852, 339.2)","(1852, 1122.0)"
Athboy,"(1947, 382.1)","(1958, 437.4)","(1944, 410.2)","(1978, 405.2)","(1924, 1248.5)"
Belfast,"(1947, 384.0)","(2007, 582.2)","(1954, 492.5)","(1978, 499.5)","(2002, 1405.7)"
Birr,"(1947, 330.8)","(2007, 466.9)","(2000, 369.2)","(1946, 380.2)","(1946, 1132.5)"


In [13]:
dry_extremes['Annual'].apply(lambda t: t[0]).value_counts()

Annual
1887    6
1933    4
1893    4
1921    2
1971    2
1855    1
2003    1
1854    1
1929    1
1900    1
1864    1
1905    1
Name: count, dtype: int64

In [14]:
wet_extremes['Annual'].apply(lambda t: t[0]).value_counts()

Annual
2002    6
1872    4
1924    2
1928    2
1852    1
1946    1
1881    1
1960    1
1861    1
1954    1
1958    1
1903    1
1966    1
2008    1
2009    1
Name: count, dtype: int64

## Interpretation
- Out of the 25 stations, 6 stations recorded dry extreme in 1887, while 4 in 1893 and 1933.
- Out of the 25 stations, 6 stations recorded wet extreme in 2002, while 4 in 1872.