In [8]:
# Import dependancies
import numpy as np
from tslearn.clustering import TimeSeriesKMeans
from tslearn.preprocessing import TimeSeriesScalerMinMax , TimeSeriesScalerMeanVariance
import matplotlib.pyplot as plt
import os

# Load feature definitions
# with open('/Users/muneeza/Documents/GitHub/GNNs_PrecisionAgriculture//SWAT Data Reader/SWAT_feat_description.pkl', 'rb') as f:
#     feat_descript = pickle.load(f)

# From 01_Gen_Clustering_Data file
feat_names = ['MON',  'PRECIP' , 'AREA', 'DAILYCN', 'SNOMELT' ,'SNOFALL', 'SURQ_GEN', 'LATQ', 'WYLD', 'PET']
n_feat = len(feat_names)
data_path = '/Users/muneeza/Documents/GitHub/DATA_SMest/HRU_clustering'
names_list = os.listdir(data_path)
names_list.remove('.DS_Store')
n_hrus = np.array([x.split('.')[-2] for x in names_list]).astype(int)
hrus_total = np.sum(n_hrus)
all_data= np.zeros((12, hrus_total,10))   # (months, hrus, features)

In [None]:
st = 0
en = 0
for i, name in enumerate(names_list):
    en += n_hrus[i]
    all_data[:,st:en,:] = np.load(data_path+'/'+name)
    st = en

all_data = all_data.transpose(1,0,2)

In [32]:
def normalization(type, all_data,n_feat):
    if type =='custom':
        max_arr = np.zeros(n_feat)
        min_arr = np.zeros(n_feat)
        X_train_norm = np.zeros(all_data.shape)
        for i in range(n_feat):
            max_arr[i] = np.max(all_data[:,:,i])
            min_arr[i] = np.min(all_data[:,:,i])
            X_train_norm[:,:,i] = (all_data[:,:,i] -  min_arr[i])/( max_arr[i]- min_arr[i])
    elif type == 'minmax':
        X_train_norm = TimeSeriesScalerMinMax(value_range=(0,1)).fit_transform(all_data)
    elif type == 'std':
        X_train_norm = TimeSeriesScalerMeanVariance(0,1).fit_transform(all_data)
    else: 
        X_train_norm = all_data
    return(X_train_norm)

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [33]:
# Normalize Data (CUSTOM)
X_train_norm = normalization('custom', all_data, n_feat)

# Train model and predict labels 
model = TimeSeriesKMeans(n_clusters=30, metric="dtw", max_iter=10)
# X (n_ts , sz, d) 
# n_ts : number of time series 
# sz : size of time series (n time steps)
# d : dimension of data (n features)
model.fit(X_train_norm)

labels = model.predict(X_train_norm)

In [None]:
# Save predicted clusters to disc
zipped = zip(names_list, labels)
clustering_custom = list(zipped)
textfile = open("clustering_custom.txt", "w")
for element in clustering_custom:
    print(element)
    textfile.write(element[0]+' , ')
    textfile.write(element[1].astype(str))
    textfile.write('\n')
textfile.close()