# K-Means Clustering of ERA5 PV Weather Patterns


In [ ]:
import xarray as xr
import pandas as pd
import plotly.express as px
from scripts.analysis import clustering
ds = xr.open_dataset('data/processed/era5_processed_Bonn_2024_months_1-6.nc')
df = ds.to_dataframe().reset_index()
features = ['surface_solar_radiation_downwards_w_m2', '2m_temperature_c', '10m_wind_speed', 'total_cloud_cover']
labels, centroids = clustering.cluster_weather_patterns(df, features, n_clusters=4)
df['cluster'] = pd.Series(labels, index=df[features].dropna().index)


## Cluster Profiles

In [ ]:
import plotly.graph_objects as go
import numpy as np
profile_df = pd.DataFrame(centroids, columns=features)
fig = go.Figure()
for i, row in profile_df.iterrows():
    fig.add_trace(go.Bar(x=features, y=row, name=f'Cluster {i}'))
fig.update_layout(barmode='group', title='Cluster Centroids (Profiles)')
fig.show()


## Visualize Clusters (Interactive Scatter Plot)

In [ ]:
fig = px.scatter(df, x='surface_solar_radiation_downwards_w_m2', y='2m_temperature_c', color='cluster', title='Clustered Weather Patterns', labels={'cluster': 'Cluster'}, hover_data=features)
fig.show()
