# Cluster gridded data


### This example script clusters era5 windfields at 850 hPa and saves the clusters to a netcdf file

## Load packages

In [None]:
import sys
sys.path.append('utils/') # Path to scripts
import read_era5 as era5
import kmeans_clustering as kmeans

## Input settings

In [None]:
date_start = '2000-01' # Start year-month for clustering data yyyy-mm
date_end   = '2005-12' # End year-month for clustering data yyyy-mm (included)
utc        = 12        # Clustering sampling time (utc) data is sampled daily
plevel     = 850       # Height level used (hPa)
lat_lims   = [-5,-50]  # South and North lattitude limit of analysis box
lon_lims   = [100,165] # West and East longitude limit of analysis box
Ncoarsen   = 6         # Coarsening factor in lat and lon direction
Nclusters  = 30        # Number of clusters for kmeans clustering
path_out   = "/home/565/fl2086/weather-maps-australia/cluster_data/" # Directory to save cluster results
path_data  = "/g/data/rt52/era5/pressure-levels/reanalysis/"         # Era5 data directory
n_jobs     = 1          # Processes used for clustering

## Load data

In [None]:
print('Read u fields')
u,time,lat,lon = era5.read_data('u',date_start,date_end,utc,lat_lims,lon_lims,path_data,Ncoarsen=Ncoarsen,plevel=plevel,progress=True)
print('Read v fields')
v,_,_,_        = era5.read_data('v',date_start,date_end,utc,lat_lims,lon_lims,path_data,Ncoarsen=Ncoarsen,plevel=plevel,progress=True)

## Clustering

In [None]:
# Stack u, v data
uv_stack = kmeans.stack(u,v)
# Cluster
clusterU,clusterV,labels,inertia,silhouette = kmeans.cluster(uv_stack,lat,lon,Nclusters,n_jobs=1)
# Save output
kmeans.save(f"{path_out}example_02.nc",Nclusters,time,lat,lon,clusterU,clusterV,labels,inertia,silhouette)