In [None]:
import keras
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.callbacks import EarlyStopping

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from tslearn.clustering import TimeSeriesKMeans
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, \
    TimeSeriesResampler
from tslearn.clustering import silhouette_score
import seaborn as sns
from tslearn.utils import to_time_series_dataset
from tslearn.clustering import silhouette_score

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder

from yellowbrick.cluster import SilhouetteVisualizer
from yellowbrick.cluster import KElbowVisualizer

In [None]:
## Import simulation data - Forest Fires

In [None]:
inputs = list()
fine = list()
burning = list()
burnedout = list()
    
for j in range(100):
    
    input_file_name = "train_inputs_" + str(j) + ".csv"    
    fine_file_name = "train_outputs_fine_" + str(j) + ".csv"
    burning_file_name = "train_outputs_burning_" + str(j) + ".csv"
    burnedout_file_name = "train_outputs_burnedout_" + str(j) + ".csv"

    inputs_file = open(input_file_name, 'r')
    Lines = inputs_file.readlines()
    inputs_temp = [np.fromstring(line.strip(), sep=',') for line in Lines]
    for inputs_temp_line in inputs_temp:
        inputs.append(inputs_temp_line)

    fine_file = open(fine_file_name, 'r')
    Lines = fine_file.readlines()
    fine_temp = [np.fromstring(line.strip(), sep=',') for line in Lines]
    for fine_temp_line in fine_temp:
        fine.append(fine_temp_line)

    burning_file = open(burning_file_name, 'r')
    Lines = burning_file.readlines()
    burning_temp = [np.fromstring(line.strip(), sep=',') for line in Lines]
    for burning_temp_line in burning_temp:
        burning.append(burning_temp_line)

    burnedout_file = open(burnedout_file_name, 'r')
    Lines = burnedout_file.readlines()
    burnedout_temp = [np.fromstring(line.strip(), sep=',') for line in Lines]
    for burnedout_temp_line in burnedout_temp:
        burnedout.append(burnedout_temp_line)

In [None]:
burnedout_percent = burnedout
for i in range(10000):
    burnedout_percent[i] = burnedout[i]/(fine[i][0]+burning[i][0]+burnedout[i][0])

In [None]:
X_train

In [None]:
seed = 1
np.random.seed(seed)
X_train = to_time_series_dataset(burnedout_percent)
sz = X_train.shape[1]

km = TimeSeriesKMeans(n_clusters=2, verbose=True, random_state=seed, metric="softdtw")
y_pred = km.fit_predict(X_train)
plt.figure()
for yi in range(4):
    plt.subplot(2, 2, yi + 1)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(0, 1)
    plt.text(0.55, 0.35,'Cluster %d' % (yi),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("soft-DTW $k$-means (mean homogeneity)")
        
plt.tight_layout()
#plt.savefig('schelling_homogeneity_ts.svg')
plt.show()

In [None]:
seed = 1
np.random.seed(seed)
X_train = to_time_series_dataset(burnedout)
sz = X_train.shape[1]

km = TimeSeriesKMeans(n_clusters=2, verbose=True, random_state=seed, metric="softdtw")
y_pred = km.fit_predict(X_train)
plt.figure()
for yi in range(4):
    plt.subplot(2, 2, yi + 1)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(0, 1)
    plt.text(0.55, 0.35,'Cluster %d' % (yi),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("soft-DTW $k$-means (mean homogeneity)")
        
plt.tight_layout()
#plt.savefig('schelling_homogeneity_ts.svg')
plt.show()

In [None]:
burnedout

In [None]:
## Mapping the cluster labels back to the inputs - KNN

In [None]:
train_outputs['softDTW_labels'] = y_pred
train_outputs.reset_index(drop=True)
backup=train_outputs

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_outputs["0"].values.reshape(-1, 1), train_outputs["softDTW_labels"],test_size=0.2, random_state = 1)
knn_clf=KNeighborsClassifier()
knn_clf.fit(X_train,y_train)
ypred=knn_clf.predict(X_test) #These are the predicted output values

In [None]:
result = confusion_matrix(y_test, ypred)
print("Confusion Matrix:")
print(result)
result1 = classification_report(y_test, ypred)
print("Classification Report:",)
print (result1)
result2 = accuracy_score(y_test,ypred)
print("Accuracy:",result2)

In [None]:
## Mapping the cluster labels back to the inputs - Simple Neural Network

In [None]:
train_outputs

In [None]:
X, X_test, Y, y_test = train_test_split(train_outputs["0"].values.reshape(-1, 1), 
                                                    train_outputs["softDTW_labels"],test_size=0.1, 
                                                    random_state = 1)

#X = np.array(X)

dummy_y = np_utils.to_categorical(Y)

model = Sequential()
model.add(Dense(8, input_shape=(X.shape[1],), activation='relu'))
model.add(Dense(2, activation='softmax'))
model.summary()

model.compile(optimizer='rmsprop', 
              loss='categorical_crossentropy',
              metrics=['accuracy'])
 
es = keras.callbacks.EarlyStopping(monitor='val_loss', 
                                   mode='min',
                                   patience=10, 
                                   restore_best_weights=True)

history = model.fit(X,
                    dummy_y,
                    callbacks=[es],
                    epochs=500, 
                    batch_size=10,
                    shuffle=True,
                    validation_split=0.2,
                    verbose=1)

history_dict = history.history

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'r', label='Training accuracy')

plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

preds = model.predict(X) 
print(preds[0]) 
print(np.sum(preds[0])) 

matrix = confusion_matrix(dummy_y.argmax(axis=1), preds.argmax(axis=1))
matrix
print(classification_report(dummy_y.argmax(axis=1), preds.argmax(axis=1)))