# K-Means Clustering of ERA5 PV Weather Patterns


In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import plotly.express as px

import sys
import os


# Add the project root to sys.path to allow imports from scripts/
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from scripts.analysis import anomaly_detection



# Path to the merged NetCDF file
merged_file = os.path.join(project_root, 'data', 'processed', 'era5_merged_Bonn.nc')

# Load the merged dataset
try:
    ds = xr.open_dataset(merged_file)
    print(f"Successfully loaded merged dataset from {merged_file}")
    print("\nDataset summary:")
    print(ds)
except Exception as e:
    print(f"Error loading merged dataset: {e}")
    raise




features = ['surface_solar_radiation_downwards_w_m2', '2m_temperature_c', '10m_wind_speed', 'total_cloud_cover']
labels, centroids = clustering.cluster_weather_patterns(df, features, n_clusters=4)
df['cluster'] = pd.Series(labels, index=df[features].dropna().index)


## Cluster Profiles

In [None]:
import plotly.graph_objects as go
import numpy as np
profile_df = pd.DataFrame(centroids, columns=features)
fig = go.Figure()
for i, row in profile_df.iterrows():
    fig.add_trace(go.Bar(x=features, y=row, name=f'Cluster {i}'))
fig.update_layout(barmode='group', title='Cluster Centroids (Profiles)')
fig.show()


## Visualize Clusters (Interactive Scatter Plot)

In [None]:
fig = px.scatter(df, x='surface_solar_radiation_downwards_w_m2', y='2m_temperature_c', color='cluster', title='Clustered Weather Patterns', labels={'cluster': 'Cluster'}, hover_data=features)
fig.show()
