# MetOffice Historic Stations Data

In [1]:
#%matplotlib inline 
# this line is required for the plots to appear in the Jupyter cells, rather than launching the matplotlib GUI
%matplotlib notebook
#this allows interactive view but you need to be in classic rather than CoCalc Jupyter notebook for this to work

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
import warnings


# Let printing work the same in Python 2 and 3
from __future__ import division,print_function

# notice two underscores _ either side of future



In [2]:
# robust loader that handles '# Provisional' comments
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue
            line = line.split('#')[0].strip()
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)

# use the robust loader!
data = load_clean_data('aberporthdata.txt')

# extract columns
year = data[:, 0]
month = data[:, 1]
rain = data[:, 5]

# filter for March
march_mask = month == 3
march_years = year[march_mask]
march_rain = rain[march_mask]

# remove NaNs
valid = ~np.isnan(march_rain)
march_years = march_years[valid]
march_rain = march_rain[valid]

# check for duplicates
assert len(march_years) == len(set(march_years)), "Duplicate years found!"

# fit line with uncertainties
(coefs, cov) = np.polyfit(march_years, march_rain, 1, cov=True)
m, c = coefs
m_err, c_err = np.sqrt(np.diag(cov))

print("=== Linear Fit Results for March Rainfall ===")
print(f"Slope (m): {m:.4f} ± {m_err:.4f} [mm/year]")
print(f"Intercept (c): {c:.2f} ± {c_err:.2f} [mm]")

# line of best fit
fit_line = m * march_years + c

plt.figure(figsize=(8, 5))
plt.plot(march_years, march_rain, 'b.', label='March Rainfall')
plt.plot(march_years, fit_line, 'r-', label='Best Fit')
plt.xlabel('Year')
plt.ylabel('March Rainfall [mm]')
plt.title('March Rainfall at Aberporth Over the Years')
plt.legend()

plt.tight_layout()
plt.show()




=== Linear Fit Results for March Rainfall ===
Slope (m): -0.0177 ± 0.1411 [mm/year]
Intercept (c): 95.69 ± 279.89 [mm]


<IPython.core.display.Javascript object>

In [3]:
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue  
            line = line.split('#')[0].strip() 
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)

data = load_clean_data('aberporthdata.txt')


year = data[:, 0]
month = data[:, 1]
sun = data[:, 6]


march_mask = month == 3
march_years = year[march_mask]
march_sun = sun[march_mask]


valid = ~np.isnan(march_sun)
march_years = march_years[valid]
march_sun = march_sun[valid]


(coefs, cov) = np.polyfit(march_years, march_sun, 1, cov=True)
m, c = coefs
m_err, c_err = np.sqrt(np.diag(cov))

print("=== Linear Fit Results for March Sunshine ===")
print(f"Slope     (m): {m:8.4f} ± {m_err:.4f}   [hours/year]")
print(f"Intercept (c): {c:8.2f} ± {c_err:.2f}   [hours]")

fit_line = m * march_years + c


plt.figure(figsize=(8, 5))
plt.plot(march_years, march_sun, 'y.', label='March Sunshine')
plt.plot(march_years, fit_line, 'r-', label='Best Fit')
plt.xlabel('Year')
plt.ylabel('March Sunshine [hours]')
plt.title('March Sunshine at Aberporth Over the Years')
plt.legend()
plt.tight_layout()
plt.show()



Slope     (m):  -0.0986 ± 0.1408   [hours/year]
Intercept (c):   319.54 ± 279.21   [hours]


<IPython.core.display.Javascript object>

In [12]:
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue
            line = line.split('#')[0].strip()
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)

file_list = [f for f in os.listdir('.') if f.endswith('.txt')]

rain_by_year = {}
spring_2025_totals = []

for filename in file_list:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            data = load_clean_data(filename)

        if data.shape[1] != 7:
            print(f"Skipping {filename}: unexpected shape {data.shape}")
            continue

        year = data[:, 0]
        month = data[:, 1]
        rain = data[:, 5]


        mask = (month >= 3) & (month <= 5)
        year = year[mask]
        rain = rain[mask]


        valid = ~np.isnan(rain)
        year = year[valid]
        rain = rain[valid]

        for y in np.unique(year):
            spring_rain = np.sum(rain[year == y])
            rain_by_year.setdefault(y, []).append(spring_rain)

           
            if int(y) == 2025:
                print(f"{filename}: Total Mar–May 2025 rainfall = {spring_rain:.1f} mm")
                spring_2025_totals.append(spring_rain)

    except Exception as e:
        print(f"Failed to process {filename}: {e}")


if spring_2025_totals:
    avg_2025 = np.mean(spring_2025_totals)
    print(f"Average Mar–May 2025 rainfall across all stations: {avg_2025:.1f} mm")
else:
    print("No 2025 spring rainfall data found in any station")


common_years = sorted(rain_by_year.keys())
avg_rain = np.array([np.mean(rain_by_year[y]) for y in common_years])
common_years = np.array(common_years)


(coefs, cov) = np.polyfit(common_years, avg_rain, 1, cov=True)
m, c = coefs
m_err, c_err = np.sqrt(np.diag(cov))
fit_line = m * common_years + c


print("\n=== Linear Fit: Average Spring Rainfall ===")
print(f"Slope     (m): {m:.4f} ± {m_err:.4f}   [mm/year]")
print(f"Intercept (c): {c:.2f} ± {c_err:.2f}   [mm]")


plt.figure(figsize=(8, 5))
plt.plot(common_years, avg_rain, 'b.', label='Avg Spring Rainfall')
plt.plot(common_years, fit_line, 'r-', label='Best Fit')
plt.xlabel('Year')
plt.ylabel('Average Rainfall (Mar–May) [mm]')
plt.title('Average Spring (Mar–May) Rainfall Across All Stations')
plt.legend()
plt.tight_layout()
plt.show()


shawburydata.txt: Total Mar–May 2025 rainfall = 71.2 mm
yeoviltondata.txt: Total Mar–May 2025 rainfall = 39.6 mm
leucharsdata.txt: Total Mar–May 2025 rainfall = 65.6 mm
cambornedata.txt: Total Mar–May 2025 rainfall = 150.6 mm
hurndata.txt: Total Mar–May 2025 rainfall = 70.0 mm
tireedata.txt: Total Mar–May 2025 rainfall = 140.4 mm
valleydata.txt: Total Mar–May 2025 rainfall = 121.0 mm
lerwickdata.txt: Total Mar–May 2025 rainfall = 245.8 mm
sheffielddata.txt: Total Mar–May 2025 rainfall = 70.8 mm
waddingtondata.txt: Total Mar–May 2025 rainfall = 39.2 mm
rossonwyedata.txt: Total Mar–May 2025 rainfall = 33.8 mm
aberporthdata.txt: Total Mar–May 2025 rainfall = 117.4 mm
heathrowdata.txt: Total Mar–May 2025 rainfall = 38.6 mm
stornowaydata.txt: Total Mar–May 2025 rainfall = 187.2 mm
manstondata.txt: Total Mar–May 2025 rainfall = 33.8 mm
eskdalemuirdata.txt: Total Mar–May 2025 rainfall = 229.4 mm
Average Mar–May 2025 rainfall across all stations: 103.4 mm

=== Linear Fit: Average Spring Rainfa

<IPython.core.display.Javascript object>

In [17]:
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue  
            line = line.split('#')[0].strip()  
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)


file_list = [f for f in os.listdir('.') if f.endswith('.txt')]

sun_by_year = {}
spring_2025_sunshine_totals = []


for filename in file_list:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            data = load_clean_data(filename)

        if data.shape[1] != 7:
            continue  

        year = data[:, 0]
        month = data[:, 1]
        sun = data[:, 6]

   
        mask = (month >= 3) & (month <= 5)
        year = year[mask]
        sun = sun[mask]


        valid = ~np.isnan(sun)
        year = year[valid]
        sun = sun[valid]

        for y in np.unique(year):
            spring_sun = np.sum(sun[year == y])
            sun_by_year.setdefault(y, []).append(spring_sun)

            if int(y) == 2025:
                spring_2025_sunshine_totals.append(spring_sun)
                print(f"{filename}: Total Mar–May 2025 sunshine = {spring_sun:.1f} hours")

    except Exception as e:
        print(f"Failed to process {filename}: {e}")


common_years = sorted(sun_by_year.keys())
avg_sun = np.array([np.mean(sun_by_year[y]) for y in common_years])
common_years = np.array(common_years)


if spring_2025_sunshine_totals:
    avg_2025 = np.mean(spring_2025_sunshine_totals)
    print(f"Average Mar–May 2025 sunshine across all stations: {avg_2025:.1f} hours")
else:
    print("No 2025 spring sunshine data found in any station")


(coefs, cov) = np.polyfit(common_years, avg_sun, 1, cov=True)
m, c = coefs
m_err, c_err = np.sqrt(np.diag(cov))
fit_line = m * common_years + c


print("\n=== Linear Fit: Average Spring Sunshine ===")
print(f"Slope     (m): {m:.4f} ± {m_err:.4f}   [hours/year]")
print(f"Intercept (c): {c:.2f} ± {c_err:.2f}   [hours]")


plt.figure(figsize=(8, 5))
plt.plot(common_years, avg_sun, 'y.', label='Avg Spring Sunshine')
plt.plot(common_years, fit_line, 'r-', label='Best Fit')


plt.xlabel('Year')
plt.ylabel('Average Sunshine [hours]')
plt.title('Average Spring (Mar–May) Sunshine Across All Stations')
plt.legend()
plt.tight_layout()
plt.show()



shawburydata.txt: Total Mar–May 2025 sunshine = 613.6 hours
yeoviltondata.txt: Total Mar–May 2025 sunshine = 618.9 hours
leucharsdata.txt: Total Mar–May 2025 sunshine = 639.6 hours
cambornedata.txt: Total Mar–May 2025 sunshine = 365.9 hours
hurndata.txt: Total Mar–May 2025 sunshine = 686.9 hours
tireedata.txt: Total Mar–May 2025 sunshine = 654.8 hours
valleydata.txt: Total Mar–May 2025 sunshine = 728.2 hours
lerwickdata.txt: Total Mar–May 2025 sunshine = 447.3 hours
sheffielddata.txt: Total Mar–May 2025 sunshine = 604.2 hours
waddingtondata.txt: Total Mar–May 2025 sunshine = 485.6 hours
rossonwyedata.txt: Total Mar–May 2025 sunshine = 611.2 hours
aberporthdata.txt: Total Mar–May 2025 sunshine = 668.5 hours
heathrowdata.txt: Total Mar–May 2025 sunshine = 665.7 hours
stornowaydata.txt: Total Mar–May 2025 sunshine = 535.0 hours
manstondata.txt: Total Mar–May 2025 sunshine = 725.9 hours
eskdalemuirdata.txt: Total Mar–May 2025 sunshine = 577.1 hours
Average Mar–May 2025 sunshine across all 

<IPython.core.display.Javascript object>

In [7]:
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue  
            line = line.split('#')[0].strip()  
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)


file_list = [f for f in os.listdir('.') if f.endswith('.txt')]

rain_by_year = {}
rain_2025_list = []

for filename in file_list:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            data = load_clean_data(filename)

        if data.ndim != 2 or data.shape[1] != 7:
            continue  

        year = data[:, 0]
        month = data[:, 1]
        rain = data[:, 5]

        
        spring_mask = (month >= 3) & (month <= 5)
        year_spring = year[spring_mask]
        rain_spring = rain[spring_mask]

        
        valid = ~np.isnan(rain_spring)
        year_spring = year_spring[valid]
        rain_spring = rain_spring[valid]

        
        for y in np.unique(year_spring):
            y_mask = np.isclose(year_spring, y)
            total = np.sum(rain_spring[y_mask])
            rain_by_year.setdefault(int(y), []).append(total)

       
        if np.any(np.isclose(year_spring, 2025)):
            r2025 = np.sum(rain_spring[np.isclose(year_spring, 2025)])
            rain_2025_list.append(r2025)

    except Exception:
        continue  


common_years = sorted(rain_by_year.keys())
avg_rain = np.array([np.mean(rain_by_year[y]) for y in common_years])
common_years = np.array(common_years)


highlight = False
if len(rain_2025_list) > 0:
    avg_rain_2025 = np.mean(rain_2025_list)
    highlight = True
    
    print(f"\nAverage spring rainfall in 2025: {avg_rain_2025:.1f} mm")
else:
    highlight = False


plt.figure(figsize=(8, 5))
plt.hist(avg_rain, bins=15, color='skyblue', edgecolor='black')
plt.xlabel('Average Spring Rainfall [mm]')
plt.ylabel('Number of Years')
plt.title('Histogram of Average Spring (Mar–May) Rainfall Across All Stations')

if highlight:
    plt.axvline(avg_rain_2025, color='b', linestyle='-', linewidth=2)
    plt.text(avg_rain_2025 + 1,
             plt.ylim()[1] * 0.9,
             f'2025',
             color='b',
             rotation=90,
             va='top',
             ha='left',
             fontsize=10)

plt.tight_layout()
plt.show()


Average spring rainfall in 2025: 103.4 mm


<IPython.core.display.Javascript object>

In [8]:
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue  
            line = line.split('#')[0].strip() 
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)


file_list = [f for f in os.listdir('.') if f.endswith('.txt')]

sun_by_year = {}
sun_2025_list = []

for filename in file_list:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            data = load_clean_data(filename)

        if data.ndim != 2 or data.shape[1] != 7:
            continue

        year = data[:, 0]
        month = data[:, 1]
        sun = data[:, 6]  

       
        spring_mask = (month >= 3) & (month <= 5)
        year_spring = year[spring_mask]
        sun_spring = sun[spring_mask]

        valid = ~np.isnan(sun_spring)
        year_spring = year_spring[valid]
        sun_spring = sun_spring[valid]

        for y in np.unique(year_spring):
            y_mask = np.isclose(year_spring, y)
            total = np.sum(sun_spring[y_mask])
            sun_by_year.setdefault(int(y), []).append(total)

        if np.any(np.isclose(year_spring, 2025)):
            s2025 = np.sum(sun_spring[np.isclose(year_spring, 2025)])
            sun_2025_list.append(s2025)

    except Exception:
        continue


common_years = sorted(sun_by_year.keys())
avg_sun = np.array([np.mean(sun_by_year[y]) for y in common_years])
common_years = np.array(common_years)

highlight = False
if len(sun_2025_list) > 0:
    avg_sun_2025 = np.mean(sun_2025_list)
    highlight = True
    print(f"\nAverage spring sunshine in 2025: {avg_sun_2025:.1f} hours")


plt.figure(figsize=(8, 5))
plt.hist(avg_sun, bins=15, color='gold', edgecolor='black')
plt.xlabel('Average Spring Sunshine [hours]')
plt.ylabel('Number of Years')
plt.title('Histogram of Average Spring (Mar–May) Sunshine Across All Stations')

if highlight:
    plt.axvline(avg_sun_2025, color='orange', linestyle='-', linewidth=2)
    plt.text(avg_sun_2025 + 1,
             plt.ylim()[1] * 0.9,
             f'2025',
             color='orange',
             rotation=90,
             va='top',
             ha='left',
             fontsize=10)

plt.tight_layout()
plt.show()


Average spring sunshine in 2025: 601.8 hours


<IPython.core.display.Javascript object>

In [9]:
def load_clean_data(filename):
    rows = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            if i < 7:
                continue
            line = line.split('#')[0].strip()
            if not line:
                continue
            parts = line.split()
            if len(parts) == 7:
                try:
                    row = [float(x) if x != '---' else np.nan for x in parts]
                    rows.append(row)
                except ValueError:
                    continue
    return np.array(rows)


file_list = [f for f in os.listdir('.') if f.endswith('.txt')]

winter_rain_by_year = {}
rain_2023_2024 = []

for filename in file_list:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UserWarning)
            data = load_clean_data(filename)

        if data.ndim != 2 or data.shape[1] != 7:
            continue

        year = data[:, 0].astype(int)
        month = data[:, 1].astype(int)
        rain = data[:, 5]

        valid = ~np.isnan(rain)
        year = year[valid]
        month = month[valid]
        rain = rain[valid]

  
        for y in range(int(min(year)), int(max(year))):
            oct_dec_mask = (year == y) & (month >= 10) & (month <= 12)
            jan_feb_mask = (year == y + 1) & (month >= 1) & (month <= 2)

            total_rain = np.sum(rain[oct_dec_mask]) + np.sum(rain[jan_feb_mask])

            if not np.isnan(total_rain):
                winter_rain_by_year.setdefault(y, []).append(total_rain)

            if y == 2023:
                rain_2023_2024.append(total_rain)

    except Exception:
        continue


common_years = sorted(winter_rain_by_year.keys())
avg_winter_rain = np.array([np.mean(winter_rain_by_year[y]) for y in common_years])
common_years = np.array(common_years)


highlight = False
if len(rain_2023_2024) > 0:
    avg_2023_2024 = np.mean(rain_2023_2024)
    highlight = True
    print(f"\nTotal rainfall Oct 2023 – Feb 2024: {avg_2023_2024:.1f} mm")


(coefs, cov) = np.polyfit(common_years, avg_winter_rain, 1, cov=True)
m, c = coefs
m_err, c_err = np.sqrt(np.diag(cov))
fit_line = m * common_years + c


print(f"Slope     (m): {m:.4f} ± {m_err:.4f} [mm/year]")
print(f"Intercept (c): {c:.2f} ± {c_err:.2f} [mm]")


plt.figure(figsize=(8, 5))
plt.plot(common_years, avg_winter_rain, 'b.', label='Winter Rainfall')
plt.plot(common_years, fit_line, 'r-', label='Best Fit')


plt.xlabel('Year')
plt.ylabel('Total Rainfall [mm]')
plt.title('Winter Rainfall (Oct–Feb) Across All Stations')
plt.legend()
plt.tight_layout()
plt.show()




Total rainfall Oct 2023 – Feb 2024: 553.5 mm
Slope     (m): 0.5253 ± 0.1116 [mm/year]
Intercept (c): -613.21 ± 216.39 [mm]


<IPython.core.display.Javascript object>