In [None]:
import keras
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.callbacks import EarlyStopping

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from tslearn.clustering import TimeSeriesKMeans
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, \
    TimeSeriesResampler
from tslearn.clustering import silhouette_score
import seaborn as sns
from tslearn.utils import to_time_series_dataset
from tslearn.clustering import silhouette_score

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder

from yellowbrick.cluster import SilhouetteVisualizer
from yellowbrick.cluster import KElbowVisualizer

In [None]:
## Import simulation data - Epstein civil unrest

In [None]:
os.chdir("/Users/maria/Desktop/WSC_simulations/epstein_civil_violence_data")
train_inputs = pd.read_csv('train_inputs_0.csv', header=None)  
train_outputs_Active = pd.read_csv('train_outputs_Active_0.csv', header=None)  
train_outputs_Jailed = pd.read_csv('train_outputs_Jailed_0.csv', header=None)  
train_outputs_Quiescent = pd.read_csv('train_outputs_Quiescent_0.csv', header=None)  
for i in range(1,100):
    filepath_in = 'train_inputs_'+str(i)+'.csv'
    filepath_out_Active = 'train_outputs_Active_'+str(i)+'.csv'
    filepath_out_Jailed = 'train_outputs_Jailed_'+str(i)+'.csv'
    filepath_out_Quiescent = 'train_outputs_Quiescent_'+str(i)+'.csv'
    file_in = pd.read_csv(filepath_in, header=None)
    file_out_Active = pd.read_csv(filepath_out_Active, header=None)
    file_out_Jailed = pd.read_csv(filepath_out_Jailed, header=None)
    file_out_Quiescent = pd.read_csv(filepath_out_Quiescent, header=None)
    train_inputs = pd.concat([train_inputs, file_in])
    train_outputs_Active = pd.concat([train_outputs_Active, file_out_Active])
    train_outputs_Jailed = pd.concat([train_outputs_Jailed, file_out_Jailed])
    train_outputs_Quiescent = pd.concat([train_outputs_Quiescent, file_out_Quiescent])

In [None]:
X_train = to_time_series_dataset(train_outputs_Active)
X_train_flat = [xi.flatten() for xi in X_train]

km = TimeSeriesKMeans(verbose=True, random_state=seed)
visualizer = KElbowVisualizer(km, k=(2,15))
 
visualizer.fit(np.array(X_train_flat))        
visualizer.show()   

In [None]:
seed = 0
np.random.seed(seed)
sz = X_train.shape[1]

km = TimeSeriesKMeans(n_clusters=5, verbose=True, random_state=seed)
y_pred = km.fit_predict(X_train)
plt.figure()
for yi in range(5):
    plt.subplot(3, 3, yi + 1)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(0, 1100)
    plt.text(0.55, 0.35,'Cluster %d' % (yi),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("Euclidean $k$-means (satisfaction)")
        
plt.tight_layout()
#plt.savefig('schelling_satisfaction_ts.svg')
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_inputs[[0, 1, 2, 3]].values.reshape(-1, 4), train_inputs["euclidean_labels"],test_size=0.2, random_state = 1)
knn_clf=KNeighborsClassifier()
knn_clf.fit(X_train,y_train)
ypred=knn_clf.predict(X_test) #These are the predicted output values

In [None]:
result = confusion_matrix(y_test, ypred)
print("Confusion Matrix:")
print(result)
result1 = classification_report(y_test, ypred)
print("Classification Report:",)
print (result1)
result2 = accuracy_score(y_test,ypred)
print("Accuracy:",result2)

In [None]:
X, X_test, Y, y_test = train_test_split(train_inputs.iloc[:, 0:4].values.reshape(-1, 4), 
                                                    train_outputs["euclidean_labels"],test_size=0.1, 
                                                    random_state = 1)

#X = np.array(X)

dummy_y = np_utils.to_categorical(Y)

model = Sequential()
model.add(Dense(16, input_shape=(X.shape[1],), activation='relu'))
model.add(Dense(16, input_shape=(X.shape[1],), activation='relu'))
model.add(Dense(5, activation='softmax'))
model.summary()

model.compile(optimizer='rmsprop', 
              loss='categorical_crossentropy',
              metrics=['accuracy'])
 

history = model.fit(X,
                    dummy_y,
                    epochs=100, 
                    batch_size=100,
                    shuffle=True,
                    validation_split=0.2,
                    verbose=1)

history_dict = history.history

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'r', label='Training accuracy')

plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

preds = model.predict(X) 
print(preds[0]) 
print(np.sum(preds[0])) 

matrix = confusion_matrix(dummy_y.argmax(axis=1), preds.argmax(axis=1))
matrix
print(classification_report(dummy_y.argmax(axis=1), preds.argmax(axis=1)))