In [None]:
# ---------------------------------------------
# IMPORT LIBRARIES
# ---------------------------------------------
import numpy as np
import pandas as pd
import os
import pickle


# ---------------------------------------------
# LOAD FEATURE NAMES AND DESCRIPTIONS
# Feature description is a dictionary object that 
# contains feature_name: feature definition pairs 
# based on SWAT IO documentation
# ---------------------------------------------
with open('/path/SWAT_feat_description.pkl', 'rb') as f:
    feat_descript = pickle.load(f)
with open('/path/SWAT_feat_names.pkl', 'rb') as f:
    feat_names = pickle.load(f)

data_path = '/path'
saverdir = '/path'
names_list = os.listdir(data_path)
names_list.remove('.DS_Store')
names_list.remove('README')


# ---------------------------------------------
# SAVE SUBSET OF FEATURES FOR CLUSTERING
# **************** USER INPUT *****************
# Define features used for clustering 
# --------------------------------------------- 
clustering_feature_names = ['MON',  'PRECIP' , 'AREA', 'DAILYCN', 'SNOMELT' ,'SNOFALL', 'SURQ_GEN', 'LATQ', 'WYLD', 'PET']

for name in names_list:
    features = np.load(data_path+'/'+name)
    df = pd.DataFrame(features)

    # Using 'numeric' option in Data_input_hru gets rid of the first 5 features 
    df.columns= feat_names[5:86]   

    # Delete annual summary (over a single year)
    # Delete simulation summary  (over 38 years)
    df.drop(df[df.MON > 12].index, inplace=True)
    n_hru = df.MON.ne(1).idxmax()

    # Select a subset of features 
    df = df[clustering_feature_names]

    # Rearrange data so data is in the format ( years, months, hrus, features) 
    clustering_features = df.to_numpy()
    clustering_features = clustering_features.reshape(38, 12, n_hru, 10 )  

    # Monthly average over 38 years of the simulation (months, hrus, features)
    avg_clustering_features = np.mean(clustering_features,axis=0)                                    
    np.save(saverdir+'/'+name+'.'+str(n_hru), avg_clustering_features)

# Save names of features selected for clustering 
with open(\clustering_feature_names\, \wb\) as fp: 
    pickle.dump(clustering_feature_names, fp)