In [None]:
from huggingface_hub import snapshot_download

def get_dataset():
    snapshot_download(
        #SimonF92-Snow/Cairngorm_Journal_Snow_APIGPT40_unrefined
        repo_id="SimonF92-Snow/Cairngorm_Journal_Snow_APIGPT40_unrefined",
        allow_patterns="hand_curated/**",
        repo_type="dataset",
        local_dir="./final_curated_dataset",
        local_dir_use_symlinks=False
    )

#get_dataset()

In [None]:
#pull in all files from final_curated_dataset/hand_curated and concat

import pandas as pd
import glob

all_files = glob.glob("./final_curated_dataset/hand_curated/*.csv")
df_list = []
for file in all_files:
    df = pd.read_csv(file)
    #print filename
    print(f"Processing file: {file}")
    df_list.append(df)

df = pd.concat(df_list, ignore_index=True)

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df['location'].value_counts()


In [None]:
df['location'].to_csv('locations.csv')

In [None]:
df_mapped_locations = pd.read_csv('mapped_locations.csv')
df_mapped_locations['general_location'].value_counts()

In [None]:
#merge in on location and specific location
df_merged = pd.merge(df, df_mapped_locations, left_on='location', right_on='specific_location', how='left')
df_merged = df_merged.drop_duplicates(subset='text', keep='first')
df_merged.to_csv('merged_df.csv', index=False)


In [None]:
#drop specific location and general location
df_merged = df_merged.drop(columns=['specific_location', 'general_location'])
df_merged

In [None]:
#merge in the nice repaired locations
df_repaired = pd.read_csv('location_annotated_merged_finished.csv')
df_repaired

In [None]:
#merge in on text col

df_merged = pd.merge(df_merged, df_repaired[['text', 'general_location', 'specific_location']], on='text', how='left')
df_merged['general_location'].value_counts()

In [None]:
#drop duplicates all rows
df_merged = df_merged.drop_duplicates(keep='first')
df_merged

In [None]:
import pandas as pd
import re

def _to_int(x):
    if x is None:
        return None
    x = str(x).strip()
    if x in {"", "-", "nan", "NaN", "None"}:
        return None
    try:
        return int(x)
    except ValueError:
        return None

def parse_date(date_str):
    if pd.isna(date_str):
        return None, None, None, None

    s = str(date_str).strip()

    # season format: "Spring 1893" (adjust list if needed)
    m = re.match(r'^(Spring|Summer|Autumn|Winter)\s+(\d{4})$', s, flags=re.I)
    if m:
        season = m.group(1).capitalize()
        year = _to_int(m.group(2))
        return year, season, None, None

    parts = s.split('/')

    if len(parts) == 3:
        day = _to_int(parts[0])    # '-' -> None
        month = _to_int(parts[1])
        year = _to_int(parts[2])
        return year, None, month, day

    if len(parts) == 2:  # e.g. "05/1893" (if it exists in your data)
        month = _to_int(parts[0])
        year = _to_int(parts[1])
        return year, None, month, None

    if len(parts) == 1:  # year only
        year = _to_int(parts[0])
        return year, None, None, None

    return None, None, None, None

# vectorized (faster than df.apply(axis=1))
df_merged[['year', 'season', 'month', 'day']] = (
    df_merged['date'].apply(parse_date).apply(pd.Series)
)

In [None]:
#drop location col
df_merged = df_merged.drop(columns=['location'])
df_merged

In [None]:
#sort on year
df_merged = df_merged.sort_values(by=['year', 'month', 'day'])
df_merged

In [None]:
df_merged['general_location'].value_counts()

In [None]:
#if 'general_location' is "cairngorm" change to "Cairngorms Other"
df_merged.loc[df_merged['general_location'].str.lower() == 'cairngorm', 'general_location'] = 'Cairngorms Other'
df_merged.loc[df_merged['general_location'].str.lower() == 'braeriach', 'general_location'] = 'Western Massif'
df_merged.loc[df_merged['general_location'].str.lower() == 'cairngorm', 'general_location'] = 'Cairngorms Other'
df_merged.loc[df_merged['general_location'].str.lower() == "beinn a' bhuird", 'general_location'] = "Beinn a' Bhuird & Ben Avon"

df_merged.loc[df_merged['general_location'].str.lower() == 'other cairngorms', 'general_location'] = 'Cairngorms Other'
df_merged.loc[df_merged['general_location'].str.lower() == 'western massif', 'general_location'] = 'Cairngorms Western Massif'

df_merged['general_location'].value_counts()



In [None]:
#change year 1013 to 1913
df_merged.loc[df_merged['year'] == 1013, 'year'] = 1913
df_merged.loc[df_merged['date'] == '21/10/1013', 'date'] = '21/10/1913'
#change 83 to 1983
df_merged.loc[df_merged['year'] == 83, 'year'] = 1983
df_merged.loc[df_merged['date'] == '02/08/83', 'date'] = '02/08/1983'
df_merged

In [None]:
df_merged = df_merged.sort_values(by=['year', 'month', 'day'])
df_merged

In [None]:
import numpy as np

location_dict ={
    "Other Highlands" : np.nan,
    "Lochnagar" : (56.954291, -3.244078) ,
    "Cairngorms Other": (57.099690, -3.665600),
    "Cairngorms Western Massif" : (57.066582, -3.732339) ,
    "Ben Macdui" : (57.070696, -3.667715),
"Ben Nevis" : (56.796735, -5.002927) ,
"Beinn a' Bhuird & Ben Avon": (57.081248, -3.505238),
"Ben Lawers" : (56.544874, -4.220930) ,
"Central Highlands" : (56.949668, -4.608764) ,
"Ben Lui" : (56.396993, -4.810804) ,
    "Torridon & Area": (57.530301, -5.454877),
    "North West Highlands": (57.190786, -5.158042),
"Glenshee Area": (56.876858, -3.373544),
    "Southern Highlands" : (56.356399, -4.577326),
    "Eastern Highlands": (57.194888, -3.281276),

}

In [None]:
#map General Location to dict- add column for coords
df_merged['Coordinates'] = df_merged['general_location'].map(location_dict)
df_merged

In [None]:
#if everything matches except text, and the text begings with a match, but one row has longer text, keep the row with the longer text

import pandas as pd

def drop_prefix_text_dupes(
    df: pd.DataFrame,
    text_col: str = "text",
    key_cols=None,          # columns to match on (excluding text)
    ignore_cols=None,       # columns to ignore in matching
    verbose: bool = True
) -> pd.DataFrame:
    if ignore_cols is None:
        ignore_cols = []
    if key_cols is None:
        key_cols = [c for c in df.columns if c not in ([text_col] + list(ignore_cols))]

    def _keep_non_prefix_max(group: pd.DataFrame) -> pd.DataFrame:
        g = group.copy()
        g["_len"] = g[text_col].astype(str).str.len()
        g = g.sort_values("_len", ascending=False)

        texts = g[text_col].astype(str).tolist()
        idxs  = g.index.tolist()

        kept, dropped = [], set()

        for i, (idx, t) in enumerate(zip(idxs, texts)):
            if idx in dropped:
                continue
            kept.append(idx)
            for j in range(i + 1, len(texts)):
                idx2 = idxs[j]
                if idx2 in dropped:
                    continue
                t2 = texts[j]
                if t.startswith(t2):
                    dropped.add(idx2)

        if verbose and dropped:
            key = {c: group.iloc[0][c] for c in key_cols}
            print(f"[Prefix-dedup] key={key} dropped={len(dropped)}")

        return g.loc[kept].drop(columns="_len")

    out = (df.groupby(key_cols, dropna=False, group_keys=False)
             .apply(_keep_non_prefix_max)
             .reset_index(drop=True))

    if verbose:
        print(f"[Summary] in={len(df)} out={len(out)} removed={len(df)-len(out)}")

    return out

# usage:
# df2 = drop_prefix_text_dupes_with_logging(df, verbose=True, print_examples=5)

df_merged = drop_prefix_text_dupes(df_merged, text_col="text", ignore_cols=["score"], verbose=True)

In [None]:
df_merged = df_merged.sort_values(by=['year', 'month', 'day'])


In [None]:
#!pip install sweetviz

In [None]:
#generate sweetviz html report
import numpy as np

if not hasattr(np, "VisibleDeprecationWarning"):
    try:
        np.VisibleDeprecationWarning = np.exceptions.VisibleDeprecationWarning
    except Exception:
        np.VisibleDeprecationWarning = DeprecationWarning

import sweetviz as sv

# df = your pandas DataFrame

df_sweetviz = df_merged.copy()
df_sweetviz['Coordinates'] = pd.to_numeric(df_sweetviz['Coordinates'], errors='coerce')

report = sv.analyze(df_sweetviz)                 # or sv.analyze([df, "Dataset"])
report.show_html("SNOSCOT__report.html", open_browser=True)

In [None]:
df_merged

In [None]:
df_merged.to_csv('SNOSCOT_v1_4.csv', index=False)

In [None]:
#plot number of entries per year
import matplotlib.pyplot as plt
year_counts = df_merged['year'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(year_counts.index, year_counts.values)
plt.xlabel('Year')
plt.ylabel('Number of Entries')
plt.title('Number of Snow Event Entries per Year')
#xlim 1850
plt.xlim(1850, 2025)
plt.show()

In [None]:
#plot number of entries per month
month_counts = df_merged['month'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(month_counts.index, month_counts.values)
plt.xlabel('Month')
plt.ylabel('Number of Entries')
plt.title('Number of Snow Event Entries per Month')
plt.xlim(0, 13)
plt.show()


In [None]:
#sum the score for the year, then plot each score per year
year_score = df_merged.groupby('year')['score'].sum().reset_index()
plt.figure(figsize=(12, 6))
plt.bar(year_score['year'], year_score['score'])
plt.xlabel('Year')
plt.ylabel('Total Score')
plt.title('Total Snow Score per Year')
plt.xlim(1850, 2025)
plt.show()


In [None]:
#do it for the seasons, grab season from both the season col and the month col (month 12,1,2 = winter etc)
def month_to_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    elif month in [9, 10, 11]:
        return 'Autumn'
    else:
        return None

season_scores = {'Winter': 0, 'Spring': 0, 'Summer': 0, 'Autumn': 0}
for index, row in df_merged.iterrows():
    if pd.notna(row['season']):
        season = row['season']
    elif pd.notna(row['month']):
        season = month_to_season(row['month'])
    else:
        continue
    if season in season_scores:
        season_scores[season] += row['score']

#plot season scores
plt.figure(figsize=(8, 6))
plt.bar(season_scores.keys(), season_scores.values())
plt.xlabel('Season')
plt.ylabel('Total Score')
plt.title('Total Snow Score per Season')
plt.show()

In [None]:
#plot a grouped bar chart of season scores per decade
decade_season_scores = {}
for index, row in df_merged.iterrows():
    if pd.notna(row['year']):
        decade = (row['year'] // 10) * 10
    else:
        continue

    if pd.notna(row['season']):
        season = row['season']
    elif pd.notna(row['month']):
        season = month_to_season(row['month'])
    else:
        continue

    if decade not in decade_season_scores:
        decade_season_scores[decade] = {'Winter': 0, 'Spring': 0, 'Summer': 0, 'Autumn': 0}

    if season in decade_season_scores[decade]:
        decade_season_scores[decade][season] += row['score']

#plot
decades = sorted(decade_season_scores.keys())
winter_scores = [decade_season_scores[d]['Winter'] for d in decades]
spring_scores = [decade_season_scores[d]['Spring'] for d in decades]
summer_scores = [decade_season_scores[d]['Summer'] for d in decades]
autumn_scores = [decade_season_scores[d]['Autumn'] for d in decades]

x = range(len(decades))
plt.figure(figsize=(12, 6))
plt.bar(x, winter_scores, width=0.2, label='Winter', align='center')
plt.bar([i + 0.2 for i in x], spring_scores, width=0.2, label='Spring', align='center')
plt.bar([i + 0.4 for i in x], summer_scores, width=0.2, label='Summer', align='center')
plt.bar([i + 0.6 for i in x], autumn_scores, width=0.2, label='Autumn', align='center')
plt.xlabel('Decade')
plt.ylabel('Total Score')
plt.title('Total Snow Score per Season by Decade')
plt.xticks([i + 0.3 for i in x], decades)
plt.legend()
#set xlim to 1850 to 2020
plt.xlim(9, len(decades))
plt.show()


In [None]:
# summer mentions of snow on Ben Macdui or Cairngorms Other or Cairngorms Western Massif
summer_snow_mentions = df_merged[
    (df_merged['score'] > 0) &
    (df_merged['general_location'].isin(['Ben Macdui', 'Cairngorms Other', 'Cairngorms Western Massif'])) &
    (
        (df_merged['season'] == 'Summer') |
        (df_merged['month'].isin([6, 7, 8]))
    )
]

summer_snow_mentions = summer_snow_mentions.sort_values(by=['year', 'month', 'day'])
summer_snow_mentions


In [None]:
#plot summer mentions by year
summer_year_counts = summer_snow_mentions['year'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(summer_year_counts.index, summer_year_counts.values)
plt.xlabel('Year')
plt.ylabel('Number of Summer Snow Mentions')
plt.title('Number of Summer Snow Mentions on Ben Macdui and Cairngorms')
plt.xlim(1850, 2025)
plt.show()


In [None]:
spring_snow_mentions = df_merged[
    (df_merged['score'] > 0) &
    (df_merged['general_location'].isin(['Ben Macdui', 'Cairngorms Other', 'Cairngorms Western Massif'])) &
    (
        (df_merged['season'] == 'Spring') |
        (df_merged['month'].isin([3, 4, 5]))
    )
]

spring_snow_mentions = spring_snow_mentions.sort_values(by=['year', 'month', 'day'])
spring_snow_mentions


spring_year_counts = spring_snow_mentions['year'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(spring_year_counts.index, spring_year_counts.values)
plt.xlabel('Year')
plt.ylabel('Number of Spring Snow Mentions')
plt.title('Number of Spring Snow Mentions on Ben Macdui and Cairngorms')
plt.xlim(1850, 2025)
plt.show()




In [None]:
autumn_snow_mentions = df_merged[
    (df_merged['score'] > 0) &
    (df_merged['general_location'].isin(['Ben Macdui', 'Cairngorms Other', 'Cairngorms Western Massif'])) &
    (
        (df_merged['season'] == 'Autumn') |
        (df_merged['month'].isin([9, 10, 11]))
    )
]

autumn_snow_mentions = autumn_snow_mentions.sort_values(by=['year', 'month', 'day'])
autumn_snow_mentions
autumn_year_counts = autumn_snow_mentions['year'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(autumn_year_counts.index, autumn_year_counts.values)
plt.xlabel('Year')
plt.ylabel('Number of Autumn Snow Mentions')
plt.title('Number of Autumn Snow Mentions on Ben Macdui and Cairngorms')
plt.xlim(1850, 2025)
plt.show()

In [None]:
#winter snow mentions
winter_snow_mentions = df_merged[
    (df_merged['score'] > 0) &
    (df_merged['general_location'].isin(['Ben Macdui', 'Cairngorms Other', 'Cairngorms Western Massif'])) &
    (
        (df_merged['season'] == 'Winter') |
        (df_merged['month'].isin([12, 1, 2]))
    )
]

winter_snow_mentions = winter_snow_mentions.sort_values(by=['year', 'month', 'day'])
winter_snow_mentions
winter_year_counts = winter_snow_mentions['year'].value_counts().sort_index()
plt.figure(figsize=(12, 6))
plt.bar(winter_year_counts.index, winter_year_counts.values)
plt.xlabel('Year')
plt.ylabel('Number of Winter Snow Mentions')
plt.title('Number of Winter Snow Mentions on Ben Macdui and Cairngorms')
plt.xlim(1850, 2025)
plt.show()


In [None]:
#calculate total per-year mentions fpr spring

spring_year_counts = spring_snow_mentions['year'].value_counts().sort_index()


#smooth this with a rolling average of 5 years
spring_year_counts_smoothed = spring_year_counts.rolling(window=5, center=False).mean()
spring_year_counts_smoothed = spring_year_counts_smoothed.rolling(window=5, center=False).mean()

plt.figure(figsize=(12, 6))
plt.plot(spring_year_counts_smoothed.index, spring_year_counts_smoothed.values)
plt.xlabel('Year')
plt.ylabel('Smoothed Number of Spring Snow Mentions')
plt.title('Smoothed Number of Spring Snow Mentions on Ben Macdui and Cairngorms')
plt.xlim(1850, 2025)
plt.show()



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# pip install cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature


def add_inset_at_lonlat(
    fig,
    map_ax,
    lon,
    lat,
    plot_inset_fn,
    *,
    inset_size=(0.12, 0.12),   # (width, height) in figure fraction
    pad=(0.0, 0.0),            # (dx, dy) in figure fraction
    src_crs=ccrs.PlateCarree(),
    inset_facecolor="white",
    inset_alpha=0.9,
    frame=True,
    zorder=20,
):
    """
    Place an inset axes at a map location given by (lon, lat), then call plot_inset_fn(inset_ax).
    Coordinates are transformed from src_crs into the map projection and then into figure coords.
    """
    # Transform lon/lat to display coords for the map axes
    x_disp, y_disp = map_ax.transData.transform(
        map_ax.projection.transform_point(lon, lat, src_crs)
    )

    # Convert display coords to figure fraction coords
    x_fig, y_fig = fig.transFigure.inverted().transform((x_disp, y_disp))

    w, h = inset_size
    dx, dy = pad

    # Anchor inset center on the coordinate (you can change to bottom-left anchoring if preferred)
    left = x_fig - w / 2 + dx
    bottom = y_fig - h / 2 + dy

    # Optionally clip to figure bounds
    left = np.clip(left, 0.0, 1.0 - w)
    bottom = np.clip(bottom, 0.0, 1.0 - h)

    inset_ax = fig.add_axes([left, bottom, w, h], zorder=zorder)
    inset_ax.set_facecolor(inset_facecolor)
    inset_ax.patch.set_alpha(inset_alpha)

    if not frame:
        for spine in inset_ax.spines.values():
            spine.set_visible(False)

    # Keep inset minimal by default
    inset_ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

    # User-supplied function draws the mini-graph into inset_ax
    plot_inset_fn(inset_ax)

    return inset_ax


def plot_scotland_with_insets(
    overlays,
    *,
    extent=(-8.8, -0.5, 54.5, 60.95),  # (lon_min, lon_max, lat_min, lat_max)
    figsize=(8, 10),
):
    """
    overlays: list of dicts (or tuples) describing each mini-graph.
      Required: lon, lat, plot_fn
      Optional per item: inset_size, pad, etc.
    """
    proj = ccrs.LambertConformal(central_longitude=-4.0, central_latitude=57.0)

    fig = plt.figure(figsize=figsize)
    map_ax = fig.add_subplot(1, 1, 1, projection=proj)

    # Map styling
    map_ax.set_extent(extent, crs=ccrs.PlateCarree())
    map_ax.add_feature(cfeature.LAND.with_scale("10m"), zorder=0)
    map_ax.add_feature(cfeature.OCEAN.with_scale("10m"), zorder=0)
    map_ax.add_feature(cfeature.COASTLINE.with_scale("10m"), linewidth=0.8, zorder=2)
    map_ax.add_feature(cfeature.BORDERS.with_scale("10m"), linewidth=0.6, zorder=2)
    map_ax.add_feature(cfeature.LAKES.with_scale("10m"), alpha=0.5, zorder=1)
    map_ax.add_feature(cfeature.RIVERS.with_scale("10m"), linewidth=0.4, zorder=1)

    # Optionally: show a marker where each inset is anchored
    for item in overlays:
        lon, lat = item["lon"], item["lat"]
        map_ax.plot(lon, lat, marker="o", markersize=3, transform=ccrs.PlateCarree(), zorder=5)

    # Add insets
    for item in overlays:
        add_inset_at_lonlat(
            fig,
            map_ax,
            item["lon"],
            item["lat"],
            item["plot_fn"],
            inset_size=item.get("inset_size", (0.12, 0.12)),
            pad=item.get("pad", (0.0, 0.0)),
            inset_facecolor=item.get("facecolor", "white"),
            inset_alpha=item.get("alpha", 0.9),
            frame=item.get("frame", True),
        )

    return fig, map_ax


# --- Example usage ------------------------------------------------------------
if __name__ == "__main__":
    rng = np.random.default_rng(0)

    def make_sparkline(data):
        # Return a function that draws into an axes
        def _plot(ax):
            ax.plot(data, linewidth=1.2)
            ax.set_xlim(0, len(data) - 1)
        return _plot

    overlays = [
        {"lon": -4.25, "lat": 55.86, "plot_fn": make_sparkline(rng.normal(size=30)), "inset_size": (0.14, 0.10)},
        {"lon": -3.19, "lat": 55.95, "plot_fn": make_sparkline(rng.normal(size=30)), "pad": (0.02, 0.02)},
        {"lon": -2.97, "lat": 56.46, "plot_fn": make_sparkline(rng.normal(size=30))},
        {"lon": -5.11, "lat": 56.82, "plot_fn": make_sparkline(rng.normal(size=30)), "frame": False},
    ]

    fig, ax = plot_scotland_with_insets(overlays)
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch

# pip install cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature


def connect_inset_to_lonlat(
    fig,
    inset_ax,
    map_ax,
    lon,
    lat,
    *,
    src_crs=ccrs.PlateCarree(),
    inset_xy=(0.5, 0.0),     # point on inset (axes coords). (0.5,0)=bottom-center
    arrowstyle="-",          # e.g. "-" or "->"
    linewidth=0.8,
    color="black",
    zorder=50,
):
    """Draw a line from the inset axes to a lon/lat location on the map."""
    # lon/lat -> map projection coordinates (x,y in map_ax data space)
    x, y = map_ax.projection.transform_point(lon, lat, src_crs)

    con = ConnectionPatch(
        xyA=inset_xy, coordsA=inset_ax.transAxes,
        xyB=(x, y),  coordsB=map_ax.transData,
        arrowstyle=arrowstyle,
        linewidth=linewidth,
        color=color,
        zorder=zorder,
    )
    fig.add_artist(con)
    return con


def add_inset_at_lonlat(
    fig,
    map_ax,
    lon,
    lat,
    plot_inset_fn,
    *,
    inset_size=(0.12, 0.12),   # (w,h) in figure fraction
    pad=(0.0, 0.0),            # (dx,dy) in figure fraction
    src_crs=ccrs.PlateCarree(),
    connect_to=None,           # None or (lon,lat)
    connect_style=None,        # dict for connect_inset_to_lonlat
):
    # map data -> display -> figure fraction
    x_disp, y_disp = map_ax.transData.transform(
        map_ax.projection.transform_point(lon, lat, src_crs)
    )
    x_fig, y_fig = fig.transFigure.inverted().transform((x_disp, y_disp))

    w, h = inset_size
    dx, dy = pad
    left = np.clip(x_fig - w / 2 + dx, 0.0, 1.0 - w)
    bottom = np.clip(y_fig - h / 2 + dy, 0.0, 1.0 - h)

    inset_ax = fig.add_axes([left, bottom, w, h], zorder=40)
    inset_ax.set_facecolor("white")
    inset_ax.patch.set_alpha(0.9)
    inset_ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

    plot_inset_fn(inset_ax)

    # connector line from inset -> target location
    if connect_to is not None:
        style = dict(arrowstyle="-", linewidth=0.8, color="black", inset_xy=(0.5, 0.0))
        if connect_style:
            style.update(connect_style)
        connect_inset_to_lonlat(fig, inset_ax, map_ax, connect_to[0], connect_to[1], **style)

    return inset_ax


def plot_scotland_with_insets(
    overlays,
    *,
    extent=(-7.2, -0.5, 55.5, 57.95),  # lon_min, lon_max, lat_min, lat_max
    figsize=(8, 10),
    use_relief=True,
):
    proj = ccrs.LambertConformal(central_longitude=-4.0, central_latitude=57.0)
    fig = plt.figure(figsize=figsize)
    ax = fig.add_subplot(1, 1, 1, projection=proj)

    ax.set_extent(extent, crs=ccrs.PlateCarree())

    # Base map
    ax.add_feature(cfeature.OCEAN.with_scale("10m"), zorder=0)
    ax.add_feature(cfeature.LAND.with_scale("10m"), zorder=1)

    # “Elevation-ish” background option (requires cartopy’s shaded relief raster)
    # if use_relief:
    #     try:
    #         ax.add_feature(cfeature.ShadedRelief(), zorder=0, alpha=0.7)
    #     except Exception:
    #         # fallback (not elevation, but a decent raster backdrop)
    #         ax.stock_img()

    ax.add_feature(cfeature.COASTLINE.with_scale("10m"), linewidth=0.8, zorder=5)
    ax.add_feature(cfeature.BORDERS.with_scale("10m"), linewidth=0.6, zorder=5)
    ax.add_feature(cfeature.RIVERS.with_scale("10m"), linewidth=0.4, zorder=4)

    # Inset plots + connectors
    for item in overlays:
        lon, lat = item["lon"], item["lat"]

        # marker at the anchor point (optional)
        ax.plot(lon, lat, marker="o", markersize=3, transform=ccrs.PlateCarree(), zorder=10)

        add_inset_at_lonlat(
            fig, ax, lon, lat,
            item["plot_fn"],
            inset_size=item.get("inset_size", (0.12, 0.12)),
            pad=item.get("pad", (0.0, 0.0)),
            connect_to=item.get("connect_to", (lon, lat)),          # default: connect to the same point
            connect_style=item.get("connect_style", None),
        )

    return fig, ax


# ---- Example wiring ---------------------------------------------------------
if __name__ == "__main__":
    rng = np.random.default_rng(0)

    def make_plotter(data):
        def _plot(ax):
            ax.plot(data, linewidth=1.2)
        return _plot

    overlays = [
        {
            "lon": -4.25, "lat": 55.86,  # Glasgow-ish
            "plot_fn": make_plotter(rng.normal(size=30)),
            "inset_size": (0.16, 0.10),
            # draw line to a different target location if you want:
            "connect_to": (-4.25, 56.0),
            "connect_style": {"arrowstyle": "->", "inset_xy": (0.5, 0.0)},
        },
        {
            "lon": -3.19, "lat": 55.95,  # Edinburgh-ish
            "plot_fn": make_plotter(rng.normal(size=30)),
            "pad": (0.02, 0.02),
            "connect_style": {"arrowstyle": "-", "inset_xy": (0.0, 0.5)},  # from left-middle
        },
    ]

    fig, ax = plot_scotland_with_insets(overlays, use_relief=True)
    plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch

import geopandas as gpd
import geodatasets
from pyproj import Transformer
import contextily as cx


def lonlat_to_fig_xy(fig, ax, x, y):
    x_disp, y_disp = ax.transData.transform((x, y))
    return fig.transFigure.inverted().transform((x_disp, y_disp))


def add_inset_at_lonlat_webmerc(
    fig, ax, lon, lat, plot_fn, *,
    transformer, inset_size=(0.12, 0.12), pad=(0, 0),
    connect_to=None, connect_style=None
):
    x, y = transformer.transform(lon, lat)
    x_fig, y_fig = lonlat_to_fig_xy(fig, ax, x, y)

    w, h = inset_size
    dx, dy = pad
    left = np.clip(x_fig - w/2 + dx, 0, 1 - w)
    bottom = np.clip(y_fig - h/2 + dy, 0, 1 - h)

    inset_ax = fig.add_axes([left, bottom, w, h], zorder=20)
    inset_ax.set_facecolor("white")
    inset_ax.patch.set_alpha(0.9)
    inset_ax.tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
    plot_fn(inset_ax)

    if connect_to is None:
        connect_to = (lon, lat)
    tx, ty = transformer.transform(connect_to[0], connect_to[1])

    style = dict(arrowstyle="-", linewidth=0.8, color="black", inset_xy=(0.5, 0.0))
    if connect_style:
        style.update(connect_style)

    con = ConnectionPatch(
        xyA=style["inset_xy"], coordsA=inset_ax.transAxes,
        xyB=(tx, ty), coordsB=ax.transData,
        arrowstyle=style["arrowstyle"],
        linewidth=style["linewidth"],
        color=style["color"],
        zorder=30,
    )
    fig.add_artist(con)

    return inset_ax


# ---- boundaries (Natural Earth via geodatasets) ----
world_path = geodatasets.get_path("naturalearth.land")  # polygon land; good for backdrop outlines
world = gpd.read_file(world_path).to_crs(epsg=3857)

fig, ax = plt.subplots(figsize=(12, 9))

# Highlands-ish extent
lon_min, lon_max, lat_min, lat_max = -7.8, -2.2, 56.5, 57.1
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
x0, y0 = transformer.transform(lon_min, lat_min)
x1, y1 = transformer.transform(lon_max, lat_max)
ax.set_xlim(min(x0, x1), max(x0, x1))
ax.set_ylim(min(y0, y1), max(y0, y1))

# Terrain-like tiles
cx.add_basemap(ax, source=cx.providers.OpenTopoMap)

# Optional land outline
#world.boundary.plot(ax=ax, linewidth=0.6, color="black", alpha=0.6)

# Example overlays
rng = np.random.default_rng(0)
def make_plotter(data):
    def _plot(a):
        a.plot(data, linewidth=1.2)
    return _plot

overlays = [
    {"lon": -4.25, "lat": 57.48, "plot_fn": make_plotter(rng.normal(size=30))},
    {"lon": -5.10, "lat": 56.82, "plot_fn": make_plotter(rng.normal(size=30)),
     "connect_style": {"arrowstyle": "->", "inset_xy": (0.5, 0.0)}},
]

for it in overlays:
    add_inset_at_lonlat_webmerc(
        fig, ax, it["lon"], it["lat"], it["plot_fn"],
        transformer=transformer,
        inset_size=it.get("inset_size", (0.14, 0.10)),
        pad=it.get("pad", (0.0, 0.0)),
        connect_to=it.get("connect_to", None),
        connect_style=it.get("connect_style", None),
    )

ax.set_axis_off()
plt.show()

