In [82]:
import torch
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.metrics.pairwise import cosine_distances

## Day wise cosine velocity plot OBS and MODEL

In [100]:

# ==== Site selection ====
site_id = 0  # Change for different sites
obs_sites_ncvar_name = {
    0: ("juelich", "juelich"),
    1: ("lin", "lin"),
    2: ("warsaw", "warsaw"),
    3: ("vienna", "vienna"),
    4: ("bourges", "bourges"),
    5: ("zargoza", "zargoza"),
    6: ("sirta", "sirta"),
    7: ("cabauw", "cabauw"),
    8: ("nuremberg", "nuremberg"),
    9: ("aurillac", "aurillac"),
    10: ("dresden", "dresden"),
}

icon_sites_ncvar_name = {
    0: ("juelich", "juelich"),
    1: ("lin", "lin"),
    2: ("warsaw", "warsaw"),
    3: ("vienna", "vienna"),
    4: ("bourges", "bourges"),
    5: ("zaragoza", "zaragoza"),
    6: ("sirta", "sirta"),
    7: ("cabauw", "cabauw"),
    8: ("nuremberg", "nuremberg"),
    9: ("aurillac", "aurillac"),
    10: ("dresden", "dresden"),
}

site_nc, _ = obs_sites_ncvar_name[site_id]
site_nc_icon, _ = icon_sites_ncvar_name[site_id]

# === Load OBS data ===
ds_obs = xr.open_dataset(f"/p/project/exaww/chatterjee1/dataset/warmworld_datasets/msgobs_108_{site_nc}crops.nc")
obs_times_raw = ds_obs['time'].values
obs_datetimes = pd.to_datetime([t[:12] for t in obs_times_raw], format="%Y%m%d%H%M")

# === Load ICON data ===
ds_model = xr.open_dataset(f"/p/scratch/exaww/chatterjee1/nn_obs/continuous/msgobs_108_{site_nc_icon}crops_icon.nc")
model_datetimes = pd.to_datetime(ds_model['time'].values)

# === Define common hourly grid ===
hourly_times = pd.date_range(start=max(obs_datetimes.min(), model_datetimes.min()).floor('H'),
                             end=min(obs_datetimes.max(), model_datetimes.max()).ceil('H'),
                             freq='H')

# === Helper function ===
def match_closest(datetimes, reference_times, tolerance=900):
    matched_indices = []
    matched_times = []
    for ref in reference_times:
        diffs = np.abs((datetimes - ref).total_seconds())
        min_idx = np.argmin(diffs)
        if diffs[min_idx] <= tolerance:
            matched_indices.append(min_idx)
            matched_times.append(ref)  # not the original time, but aligned to the grid
    return matched_indices, matched_times

# === Match indices ===
obs_matched_idx, aligned_obs_times = match_closest(obs_datetimes, hourly_times)
model_matched_idx = [i for i, t in enumerate(model_datetimes) if t in hourly_times]

# === Load features and clusters ===
obs_features_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_obs_features/trainfeat_obs_{site_nc}.pth", map_location="cpu")
obs_clusters_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_obs_features/obs_{site_nc}_cluster_10_labels.pth", map_location="cpu")
obs_clusters_all = np.array(obs_clusters_all)
obs_clusters_all[obs_clusters_all == 0] = 7

model_features_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_icon_features/trainfeat_icon_{site_nc_icon}.pth", map_location="cpu")
model_clusters_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_icon_features/icon_{site_nc_icon}_cluster_10_labels.pth", map_location="cpu")
model_clusters_all = np.array(model_clusters_all)
model_clusters_all[model_clusters_all == 0] = 7

# === Filter to matched indices ===
obs_features_hourly = obs_features_all[obs_matched_idx]
obs_clusters_hourly = obs_clusters_all[obs_matched_idx]
obs_hourly_datetimes = pd.to_datetime(aligned_obs_times)  # This will be 00:00, 01:00, etc.

model_features_hourly = model_features_all[model_matched_idx]
model_clusters_hourly = model_clusters_all[model_matched_idx]
model_hourly_datetimes = model_datetimes[model_matched_idx]

# === Select one top day ===
label_of_interest = 1
obs_df = pd.DataFrame({
    "timestamp": obs_hourly_datetimes,
    "date": obs_hourly_datetimes.date,
    "cluster": obs_clusters_hourly
})

top_dates = obs_df[obs_df["cluster"] == label_of_interest].groupby("date").size().sort_values(ascending=False).head(5).index
selected_date = top_dates[4]

# === Extract daily data ===
day_indices_obs = obs_df[obs_df["date"] == selected_date].index
obs_day_features = obs_features_hourly[day_indices_obs]
obs_day_times = obs_hourly_datetimes[day_indices_obs]
obs_day_clusters = obs_clusters_hourly[day_indices_obs]

model_df = pd.DataFrame({
    "timestamp": model_hourly_datetimes,
    "date": model_hourly_datetimes.date,
    "cluster": model_clusters_hourly
})

day_indices_model = model_df[model_df["date"] == selected_date].index
model_day_features = model_features_hourly[day_indices_model]
model_day_times = model_hourly_datetimes[day_indices_model]
model_day_clusters = model_clusters_hourly[day_indices_model]

# === Compute cosine velocity ===
def compute_cosine_velocity(features):
    return [
        cosine_distances(features[i].unsqueeze(0), features[i - 1].unsqueeze(0))[0, 0]
        for i in range(1, len(features))
    ]

obs_velocity = compute_cosine_velocity(obs_day_features)
model_velocity = compute_cosine_velocity(model_day_features)

# === Plot with enhanced markers and linestyle ===
plt.figure(figsize=(12, 5))

# OBS line
plt.plot(obs_day_times[1:], obs_velocity, color='blue', linestyle='-', linewidth=1.5, label='OBS Velocity')
# MODEL line
plt.plot(model_day_times[1:], model_velocity, color='orange', linestyle='--', linewidth=1.5, label='Model Velocity')

# OBS markers
obs_cluster8_labeled = False
for t, v, c in zip(obs_day_times[1:], obs_velocity, obs_day_clusters[1:]):
    if c == label_of_interest:
        plt.scatter(t, v, color='blue', marker='s', s=100, edgecolor='k', label=f'OBS Cluster {label_of_interest}' if not obs_cluster8_labeled else "")
        obs_cluster8_labeled = True
    else:
        plt.scatter(t, v, color='blue', marker='o', s=30, label=f'OBS Cluster {c}')

# MODEL markers
model_cluster8_labeled = False
for t, v, c in zip(model_day_times[1:], model_velocity, model_day_clusters[1:]):
    if c == label_of_interest:
        plt.scatter(t, v, color='orange', marker='o', s=100, facecolors='orange', edgecolors='k', label=f'Model Cluster {label_of_interest}' if not model_cluster8_labeled else "")
        model_cluster8_labeled = True
    else:
        plt.scatter(t, v, color='orange', marker='x', s=30, label=f'Model Cluster {c}')

plt.xlabel("Time")
plt.ylabel("Cosine Distance to Previous Hour")
plt.title(f"Latent Cosine Velocity (OBS vs MODEL)\n{selected_date}")
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.savefig(f"/p/project1/exaww/chatterjee1/plots/continuous/lcv_cl_{label_of_interest}_{selected_date}.png", dpi=100, bbox_inches="tight")
plt.show()

## Difference in cosine distance for top N days

In [57]:
# ==== Site selection ====
site_id = 0  # Change for different sites
obs_sites_ncvar_name = {
    0: ("juelich", "juelich"),
    1: ("lin", "lin"),
    2: ("warsaw", "warsaw"),
    3: ("vienna", "vienna"),
    4: ("bourges", "bourges"),
    5: ("zargoza", "zargoza"),
    6: ("sirta", "sirta"),
    7: ("cabauw", "cabauw"),
    8: ("nuremberg", "nuremberg"),
    9: ("aurillac", "aurillac"),
    10: ("dresden", "dresden"),
}

icon_sites_ncvar_name = {
    0: ("juelich", "juelich"),
    1: ("lin", "lin"),
    2: ("warsaw", "warsaw"),
    3: ("vienna", "vienna"),
    4: ("bourges", "bourges"),
    5: ("zaragoza", "zaragoza"),
    6: ("sirta", "sirta"),
    7: ("cabauw", "cabauw"),
    8: ("nuremberg", "nuremberg"),
    9: ("aurillac", "aurillac"),
    10: ("dresden", "dresden"),
}

site_nc, _ = obs_sites_ncvar_name[site_id]
site_nc_icon, _ = icon_sites_ncvar_name[site_id]

# === Load OBS data ===
ds_obs = xr.open_dataset(f"/p/project/exaww/chatterjee1/dataset/warmworld_datasets/msgobs_108_{site_nc}crops.nc")
obs_times_raw = ds_obs['time'].values
obs_datetimes = pd.to_datetime([t[:12] for t in obs_times_raw], format="%Y%m%d%H%M")

# === Load ICON data ===
ds_model = xr.open_dataset(f"/p/scratch/exaww/chatterjee1/nn_obs/continuous/msgobs_108_{site_nc_icon}crops_icon.nc")
model_datetimes = pd.to_datetime(ds_model['time'].values)

# === Define common hourly grid ===
hourly_times = pd.date_range(start=max(obs_datetimes.min(), model_datetimes.min()).floor('H'),
                             end=min(obs_datetimes.max(), model_datetimes.max()).ceil('H'),
                             freq='H')

# === Helper function ===
def match_closest(datetimes, reference_times, tolerance=900):
    matched_indices = []
    matched_times = []
    for ref in reference_times:
        diffs = np.abs((datetimes - ref).total_seconds())
        min_idx = np.argmin(diffs)
        if diffs[min_idx] <= tolerance:
            matched_indices.append(min_idx)
            matched_times.append(ref)
    return matched_indices, matched_times

# === Match indices ===
obs_matched_idx, aligned_obs_times = match_closest(obs_datetimes, hourly_times)
model_matched_idx = [i for i, t in enumerate(model_datetimes) if t in hourly_times]

# === Load features and clusters ===
obs_features_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_obs_features/trainfeat_obs_{site_nc}.pth", map_location="cpu")
obs_clusters_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_obs_features/obs_{site_nc}_cluster_10_labels.pth", map_location="cpu")
obs_clusters_all = np.array(obs_clusters_all)
obs_clusters_all[obs_clusters_all == 0] = 7

model_features_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_icon_features/trainfeat_icon_{site_nc_icon}.pth", map_location="cpu")
model_clusters_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_icon_features/icon_{site_nc_icon}_cluster_10_labels.pth", map_location="cpu")
model_clusters_all = np.array(model_clusters_all)
model_clusters_all[model_clusters_all == 0] = 7

# === Filter to matched indices ===
obs_features_hourly = obs_features_all[obs_matched_idx]
obs_clusters_hourly = obs_clusters_all[obs_matched_idx]
obs_hourly_datetimes = pd.to_datetime(aligned_obs_times)

model_features_hourly = model_features_all[model_matched_idx]
model_clusters_hourly = model_clusters_all[model_matched_idx]
model_hourly_datetimes = model_datetimes[model_matched_idx]

# === Aggregate to DataFrame ===
obs_df = pd.DataFrame({
    "timestamp": obs_hourly_datetimes,
    "date": obs_hourly_datetimes.date,
    "cluster": obs_clusters_hourly
})

# === Select top N days ===
label_of_interest = 8
N = 25
top_dates = obs_df[obs_df["cluster"] == label_of_interest].groupby("date").size().sort_values(ascending=False).head(N).index

# === Init collection ===
hourly_diffs = {f"{h:02d}:00": [] for h in range(24)}
ensemble_count = 0

for date in top_dates:
    obs_day_idx = obs_df[obs_df["date"] == date].index
    model_day_idx = pd.Index([i for i, t in enumerate(model_hourly_datetimes) if t.date() == date])

    obs_feats = obs_features_hourly[obs_day_idx]
    model_feats = model_features_hourly[model_day_idx]
    obs_times = obs_hourly_datetimes[obs_day_idx]
    model_times = model_hourly_datetimes[model_day_idx]

    obs_clusters = obs_clusters_hourly[obs_day_idx]
    model_clusters = model_clusters_hourly[model_day_idx]

    print(f"Date: {date} | OBS cluster {label_of_interest} count: {(obs_clusters == label_of_interest).sum()} | MODEL cluster {label_of_interest} count: {(model_clusters == label_of_interest).sum()}")

    common_times = sorted(set(obs_times[1:]).intersection(set(model_times[1:])))

    obs_vel = [cosine_distances(obs_feats[i].unsqueeze(0), obs_feats[i-1].unsqueeze(0))[0, 0]
                for i in range(1, len(obs_feats)) if obs_times[i] in common_times]
    model_vel = [cosine_distances(model_feats[i].unsqueeze(0), model_feats[i-1].unsqueeze(0))[0, 0]
                for i in range(1, len(model_feats)) if model_times[i] in common_times]
    times_used = [t.strftime('%H:00') for t in common_times]

    for t, o, m in zip(times_used, obs_vel, model_vel):
        hourly_diffs[t].append(o - m)

    ensemble_count += 1

# === Prepare Plot ===
plt.figure(figsize=(12, 5))
hour_labels = list(hourly_diffs.keys())
all_diffs = [hourly_diffs[hr] for hr in hour_labels]

# Plot individual ensemble lines
for i in range(ensemble_count):
    day_diffs = [hourly_diffs[hr][i] if i < len(hourly_diffs[hr]) else np.nan for hr in hour_labels]
    plt.plot(hour_labels, day_diffs, color='dimgray', alpha=0.7)

# Plot mean and std
mean_diffs = [np.nanmean(hourly_diffs[hr]) for hr in hour_labels]
std_diffs = [np.nanstd(hourly_diffs[hr]) for hr in hour_labels]

plt.plot(hour_labels, mean_diffs, color='black', label='Mean Difference', linewidth=2)
plt.fill_between(hour_labels,
                 np.array(mean_diffs) - np.array(std_diffs),
                 np.array(mean_diffs) + np.array(std_diffs),
                 color='gray', alpha=0.3, label='±1 Std Dev')

plt.xlabel("Hour of Day")
plt.ylabel("OBS - MODEL Cosine Velocity")
plt.title(f"Latent Cosine Velocity Difference over {ensemble_count} Days (Cluster {label_of_interest} dominant)")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig(f"/p/project1/exaww/chatterjee1/plots/continuous/lcv_ensemble_difference_cl{label_of_interest}_top{N}.png", dpi=100)
plt.show()

Date: 2023-06-25 | OBS cluster 8 count: 12 | MODEL cluster 8 count: 25
Date: 2023-07-07 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 25
Date: 2023-06-10 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 25
Date: 2023-07-08 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 21
Date: 2023-06-13 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 25
Date: 2023-06-12 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 25
Date: 2023-06-11 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 25
Date: 2023-09-07 | OBS cluster 8 count: 11 | MODEL cluster 8 count: 25
Date: 2023-09-09 | OBS cluster 8 count: 10 | MODEL cluster 8 count: 25
Date: 2023-09-08 | OBS cluster 8 count: 10 | MODEL cluster 8 count: 25
Date: 2023-09-06 | OBS cluster 8 count: 10 | MODEL cluster 8 count: 25
Date: 2023-09-05 | OBS cluster 8 count: 10 | MODEL cluster 8 count: 25
Date: 2023-06-04 | OBS cluster 8 count: 9 | MODEL cluster 8 count: 18
Date: 2023-06-05 | OBS cluster 8 count: 9 | MODEL cluster 8 count: 20
Date: 20

  mean_diffs = [np.nanmean(hourly_diffs[hr]) for hr in hour_labels]
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


## All sites

In [81]:

# === Site info ===
obs_sites_ncvar_name = {
    0: ("juelich", "juelich"),
    1: ("lin", "lin"),
    2: ("warsaw", "warsaw"),
    3: ("vienna", "vienna"),
    4: ("bourges", "bourges"),
    5: ("zargoza", "zargoza"),
    6: ("sirta", "sirta"),
    7: ("cabauw", "cabauw"),
    8: ("nuremberg", "nuremberg"),
    9: ("aurillac", "aurillac"),
    10: ("dresden", "dresden"),
}

icon_sites_ncvar_name = {
    0: ("juelich", "juelich"),
    1: ("lin", "lin"),
    2: ("warsaw", "warsaw"),
    3: ("vienna", "vienna"),
    4: ("bourges", "bourges"),
    5: ("zaragoza", "zargoza"),
    6: ("sirta", "sirta"),
    7: ("cabauw", "cabauw"),
    8: ("nuremberg", "nuremberg"),
    9: ("aurillac", "aurillac"),
    10: ("dresden", "dresden"),
}

label_of_interest = 8
top_k = 10

all_hourly_diffs = defaultdict(list)

for site_id in range(11):
    site_nc, _ = obs_sites_ncvar_name[site_id]
    site_nc_icon, site_feat_icon = icon_sites_ncvar_name[site_id]

    ds_obs = xr.open_dataset(f"/p/project/exaww/chatterjee1/dataset/warmworld_datasets/msgobs_108_{site_nc}crops.nc")
    obs_times_raw = ds_obs['time'].values
    obs_datetimes = pd.to_datetime([t[:12] for t in obs_times_raw], format="%Y%m%d%H%M")

    ds_model = xr.open_dataset(f"/p/scratch/exaww/chatterjee1/nn_obs/continuous/msgobs_108_{site_nc_icon}crops_icon.nc")
    model_datetimes = pd.to_datetime(ds_model['time'].values)

    hourly_times = pd.date_range(start=max(obs_datetimes.min(), model_datetimes.min()).floor('H'),
                                 end=min(obs_datetimes.max(), model_datetimes.max()).ceil('H'),
                                 freq='H')

    def match_closest(datetimes, reference_times, tolerance=900):
        matched_indices = []
        matched_times = []
        for ref in reference_times:
            diffs = np.abs((datetimes - ref).total_seconds())
            min_idx = np.argmin(diffs)
            if diffs[min_idx] <= tolerance:
                matched_indices.append(min_idx)
                matched_times.append(ref)
        return matched_indices, matched_times

    obs_matched_idx, aligned_obs_times = match_closest(obs_datetimes, hourly_times)
    model_matched_idx = [i for i, t in enumerate(model_datetimes) if t in hourly_times]

    obs_features_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_obs_features/trainfeat_obs_{site_nc}.pth", map_location="cpu")
    obs_clusters_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_obs_features/obs_{site_nc}_cluster_10_labels.pth", map_location="cpu")
    obs_clusters_all = np.array(obs_clusters_all)
    obs_clusters_all[obs_clusters_all == 0] = 7

    model_features_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_icon_features/trainfeat_icon_{site_feat_icon}.pth", map_location="cpu")
    model_clusters_all = torch.load(f"/p/project/exaww/chatterjee1/mcspss_continuous/analysis/location_icon_features/icon_{site_feat_icon}_cluster_10_labels.pth", map_location="cpu")
    model_clusters_all = np.array(model_clusters_all)
    model_clusters_all[model_clusters_all == 0] = 7

    obs_features_hourly = obs_features_all[obs_matched_idx]
    obs_clusters_hourly = obs_clusters_all[obs_matched_idx]
    obs_hourly_datetimes = pd.to_datetime(aligned_obs_times)

    model_features_hourly = model_features_all[model_matched_idx]
    model_clusters_hourly = model_clusters_all[model_matched_idx]
    model_hourly_datetimes = model_datetimes[model_matched_idx]

    obs_df = pd.DataFrame({
        "timestamp": obs_hourly_datetimes,
        "date": obs_hourly_datetimes.date,
        "cluster": obs_clusters_hourly
    })

    top_dates = obs_df[obs_df["cluster"] == label_of_interest].groupby("date").size().sort_values(ascending=False).head(top_k).index

    for selected_date in top_dates:
        day_indices_obs = obs_df[obs_df["date"] == selected_date].index
        obs_day_features = obs_features_hourly[day_indices_obs]
        obs_day_times = obs_hourly_datetimes[day_indices_obs]
        obs_day_clusters = obs_clusters_hourly[day_indices_obs]

        model_df = pd.DataFrame({
            "timestamp": model_hourly_datetimes,
            "date": model_hourly_datetimes.date,
            "cluster": model_clusters_hourly
        })

        day_indices_model = model_df[model_df["date"] == selected_date].index
        model_day_features = model_features_hourly[day_indices_model]
        model_day_times = model_hourly_datetimes[day_indices_model]
        model_day_clusters = model_clusters_hourly[day_indices_model]

        def compute_cosine_velocity(features):
            return [
                cosine_distances(features[i].unsqueeze(0), features[i - 1].unsqueeze(0))[0, 0]
                for i in range(1, len(features))
            ]

        obs_velocity = compute_cosine_velocity(obs_day_features)
        model_velocity = compute_cosine_velocity(model_day_features)

        obs_times = obs_day_times[1:]
        model_times = model_day_times[1:]

        common_times = sorted(set(obs_times).intersection(set(model_times)))

        for t in common_times:
            obs_idx = np.where(obs_times == t)[0][0]
            model_idx = np.where(model_times == t)[0][0]
            diff = obs_velocity[obs_idx] - model_velocity[model_idx]
            hour_label = t.strftime("%H:%M")
            all_hourly_diffs[hour_label].append(diff)

        print(f"Site: {site_nc}, Date: {selected_date}, OBS cluster {label_of_interest} count: {(obs_day_clusters == label_of_interest).sum()}, MODEL cluster {label_of_interest} count: {(model_day_clusters == label_of_interest).sum()}")

# === Plotting ===
hour_labels = sorted(all_hourly_diffs.keys(), key=lambda x: int(x.split(":")[0]))
plt.figure(figsize=(12, 6))

for hour in hour_labels:
    y_vals = all_hourly_diffs[hour]
    x_vals = [hour] * len(y_vals)
    #plt.plot([hour]*len(y_vals), y_vals, 'o-', color='dimgray', alpha=0.9)
    #plt.scatter([hour] * len(y_vals), y_vals, color='dimgray', alpha=0.9, marker='o')
    y_vals_filtered = [y for y in y_vals if -0.35 <= y <= 0.35]
    plt.scatter([hour] * len(y_vals_filtered), y_vals_filtered, color='dimgray', alpha=0.9, marker='o')

mean_vals = [np.mean(all_hourly_diffs[h]) for h in hour_labels]
std_vals = [np.std(all_hourly_diffs[h]) for h in hour_labels]

plt.plot(hour_labels, mean_vals, color='black', linewidth=2, label='Mean Difference')
plt.fill_between(hour_labels, np.array(mean_vals) - np.array(std_vals), np.array(mean_vals) + np.array(std_vals),
                 color='gray', alpha=0.3, label='Std Dev')

plt.xticks(rotation=45)
plt.grid(True)
plt.xlabel("Hour of Day")
plt.ylabel("Cosine Velocity Difference (OBS - MODEL)")
plt.title(f"Obs - Model, Hourly Cosine Velocity Difference Ensemble\n(Cluster {label_of_interest})All Sites, Top {top_k} Days")
plt.legend()
plt.tight_layout()
plt.savefig(f"/p/project1/exaww/chatterjee1/plots/continuous/lcv_cl_{label_of_interest}_ensemble_all_sites_top{top_k}.png", dpi=100)
plt.show()

Site: juelich, Date: 2023-06-25, OBS cluster 8 count: 12, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-07-07, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-06-10, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-07-08, OBS cluster 8 count: 11, MODEL cluster 8 count: 21
Site: juelich, Date: 2023-06-13, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-06-12, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-06-11, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-09-07, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-09-09, OBS cluster 8 count: 10, MODEL cluster 8 count: 25
Site: juelich, Date: 2023-09-08, OBS cluster 8 count: 10, MODEL cluster 8 count: 25
Site: lin, Date: 2023-07-08, OBS cluster 8 count: 11, MODEL cluster 8 count: 25
Site: lin, Date: 2023-07-15, OBS cluster 8 count: 11, MODEL cluster 8 count: 14


In [68]:
len([hour]*len(y_vals)), len(y_vals)

(275, 275)

In [60]:
site_nc_icon, site_feat_icon

('zaragoza', 'zargoza')