In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import SpectralClustering

# Bank Reserves

In [33]:
br_pca = pd.read_csv('extracted_features/bank_reserves_pca_standardized.csv')
br_pca = br_pca.drop('Unnamed: 0', axis=1)
br_dae = pd.read_csv('extracted_features/bank_reserves_dae.csv', header=None)
br_dcae = pd.read_csv('extracted_features/bank_reserves_dcae.csv', header=None)

## Kmeans

In [34]:
br_pca_kmeans = KMeans(n_clusters=7, random_state=0, n_init="auto").fit(br_pca).labels_

In [35]:
br_dae_kmeans = KMeans(n_clusters=7, random_state=0, n_init="auto").fit(br_dae).labels_

In [36]:
br_dcae_kmeans = KMeans(n_clusters=7, random_state=0, n_init="auto").fit(br_dcae).labels_

## Agglomerative Clustering

In [37]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=7, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(br_pca[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(br_pca[1:10000], labels_sub)
br_pca_agglom = neigh.predict(br_pca)

In [38]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=7, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(br_dae[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(br_dae[1:10000], labels_sub)
br_dae_agglom = neigh.predict(br_dae)

In [39]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=7, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(br_dcae[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(br_dcae[1:10000], labels_sub)
br_dcae_agglom = neigh.predict(br_dcae)

## Spectral Clustering

In [48]:
spectral_labs =SpectralClustering(n_clusters=7, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(br_pca[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(br_pca[1:10000], spectral_labs)
br_pca_spectral = neigh.predict(br_pca)



In [49]:
spectral_labs =SpectralClustering(n_clusters=7, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(br_dae[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(br_dae[1:10000], spectral_labs)
br_dae_spectral = neigh.predict(br_dae)

In [50]:
spectral_labs =SpectralClustering(n_clusters=7, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(br_dcae[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(br_dcae[1:10000], spectral_labs)
br_dcae_spectral = neigh.predict(br_dcae)

## Concat results

In [56]:
br_results = np.vstack((br_pca_kmeans, br_dae_kmeans, br_dcae_kmeans, 
                        br_pca_agglom, br_dae_agglom, br_dcae_agglom, 
                        br_pca_spectral, br_dae_spectral, br_dcae_spectral))
br_results_df = pd.DataFrame(br_results)
br_results_df.index = ["PCA_KMeans", "DAE_KMeans", "DCAE_KMeans", 
    "PCA_Agglom", "DAE_Agglom", "DCAE_Agglom", 
    "PCA_Spectral", "DAE_Spectral", "DCAE_Spectral"]

In [58]:
br_results_df.to_csv("bank_reserves_results.csv", )

In [59]:
br_results_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,99990,99991,99992,99993,99994,99995,99996,99997,99998,99999
PCA_KMeans,6,6,6,3,2,6,6,1,6,6,...,3,2,2,1,6,5,4,0,2,6
DAE_KMeans,5,5,2,4,0,2,2,4,5,2,...,4,0,6,1,2,1,1,3,6,5
DCAE_KMeans,2,2,3,4,0,3,3,1,2,3,...,4,0,5,1,3,1,4,0,5,2
PCA_Agglom,4,4,4,0,3,4,4,0,4,4,...,0,3,6,0,4,1,5,3,6,6
DAE_Agglom,1,1,5,3,4,5,1,3,1,5,...,3,4,2,3,1,6,6,0,2,1
DCAE_Agglom,0,0,0,6,1,0,0,5,0,0,...,6,1,1,5,0,5,2,3,1,4
PCA_Spectral,3,3,0,5,4,0,3,5,3,0,...,5,4,6,5,3,2,1,4,6,6
DAE_Spectral,4,4,0,1,5,0,4,1,4,0,...,1,6,5,1,4,3,3,6,5,5
DCAE_Spectral,5,5,0,6,2,0,5,1,5,0,...,6,2,4,1,5,1,3,2,4,4


# Epstein Civil Violence

In [76]:
ecv_pca = pd.read_csv('extracted_features/epstein_pca_standardized.csv')
ecv_pca = ecv_pca.drop('Unnamed: 0', axis=1)
ecv_dae = pd.read_csv('extracted_features/epstein_dae.csv', header=None)
ecv_dcae = pd.read_csv('extracted_features/epstein_dcae.csv', header=None)

## Kmeans

In [77]:
ecv_pca_kmeans = KMeans(n_clusters=8, random_state=0, n_init="auto").fit(ecv_pca).labels_
ecv_dae_kmeans = KMeans(n_clusters=8, random_state=0, n_init="auto").fit(ecv_dae).labels_
ecv_dcae_kmeans = KMeans(n_clusters=8, random_state=0, n_init="auto").fit(ecv_dcae).labels_

## Agglomerative 

In [78]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=8, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(ecv_pca[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ecv_pca[1:10000], labels_sub)
ecv_pca_agglom = neigh.predict(ecv_pca)

In [79]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=8, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(ecv_dae[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ecv_dae[1:10000], labels_sub)
ecv_dae_agglom = neigh.predict(ecv_dae)

In [80]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=8, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(ecv_dcae[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ecv_dcae[1:10000], labels_sub)
ecv_dcae_agglom = neigh.predict(ecv_dcae)

## Spectral Clustering

In [81]:
spectral_labs =SpectralClustering(n_clusters=8, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(ecv_pca[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ecv_pca[1:10000], spectral_labs)
ecv_pca_spectral = neigh.predict(ecv_pca)

In [82]:
spectral_labs =SpectralClustering(n_clusters=8, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(ecv_dae[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ecv_dae[1:10000], spectral_labs)
ecv_dae_spectral = neigh.predict(ecv_dae)

In [83]:
spectral_labs =SpectralClustering(n_clusters=8, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(ecv_dcae[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ecv_dcae[1:10000], spectral_labs)
ecv_dcae_spectral = neigh.predict(ecv_dcae)

## Concat Results

In [84]:
ecv_results = np.vstack((ecv_pca_kmeans, ecv_dae_kmeans, ecv_dcae_kmeans, 
                        ecv_pca_agglom, ecv_dae_agglom, ecv_dcae_agglom, 
                        ecv_pca_spectral, ecv_dae_spectral, ecv_dcae_spectral))
ecv_results_df = pd.DataFrame(ecv_results)
ecv_results_df.index = ["PCA_KMeans", "DAE_KMeans", "DCAE_KMeans", 
    "PCA_Agglom", "DAE_Agglom", "DCAE_Agglom", 
    "PCA_Spectral", "DAE_Spectral", "DCAE_Spectral"]

In [85]:
ecv_results_df.to_csv("epstein_results.csv", )

In [86]:
ecv_results_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,50352,50353,50354,50355,50356,50357,50358,50359,50360,50361
PCA_KMeans,3,5,5,0,5,5,0,0,0,1,...,5,1,5,5,1,0,5,5,5,0
DAE_KMeans,4,7,7,0,7,7,0,3,0,1,...,7,1,7,7,1,0,7,7,7,0
DCAE_KMeans,6,2,2,0,2,2,0,0,0,1,...,2,1,2,2,1,0,2,2,2,0
PCA_Agglom,1,7,3,2,7,7,2,5,2,3,...,7,3,7,7,3,2,7,7,7,2
DAE_Agglom,4,2,2,3,2,2,3,0,3,7,...,2,7,2,2,7,3,2,2,2,3
DCAE_Agglom,0,2,2,1,2,2,1,1,1,7,...,2,2,2,2,0,1,2,2,2,1
PCA_Spectral,2,0,0,1,7,5,4,1,4,6,...,0,6,7,5,6,1,0,0,5,1
DAE_Spectral,5,0,1,2,0,6,2,7,2,4,...,0,4,3,6,4,2,1,0,6,2
DCAE_Spectral,7,0,6,3,0,5,3,3,3,2,...,0,6,4,5,1,3,6,0,5,3


# Forest Fire

In [8]:
ff_pca = pd.read_csv('extracted_features/forestfire_pca_standardized.csv')
ff_pca = ff_pca.drop('Unnamed: 0', axis=1)
ff_dae = pd.read_csv('extracted_features/forestfire_dae.csv', header=None)
ff_dcae = pd.read_csv('extracted_features/forestfire_dcae.csv', header=None)

## Kmeans

In [9]:
ff_pca_kmeans = KMeans(n_clusters=4, random_state=0, n_init="auto").fit(ff_pca).labels_
ff_dae_kmeans = KMeans(n_clusters=4, random_state=0, n_init="auto").fit(ff_dae).labels_
ff_dcae_kmeans = KMeans(n_clusters=4, random_state=0, n_init="auto").fit(ff_dcae).labels_

## Agglomerative

In [12]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=4, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(ff_pca[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ff_pca[1:10000], labels_sub)
ff_pca_agglom = neigh.predict(ff_pca)

In [13]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=4, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(ff_dae[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ff_dae[1:10000], labels_sub)
ff_dae_agglom = neigh.predict(ff_dae)

In [14]:
hierarchical_cluster = AgglomerativeClustering(n_clusters=4, metric='euclidean', linkage='ward')
labels_sub = hierarchical_cluster.fit_predict(ff_dcae[1:10000])
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ff_dcae[1:10000], labels_sub)
ff_dcae_agglom = neigh.predict(ff_dcae)

## Spectral

In [15]:
spectral_labs =SpectralClustering(n_clusters=4, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(ff_pca[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ff_pca[1:10000], spectral_labs)
ff_pca_spectral = neigh.predict(ff_pca)



In [16]:
spectral_labs =SpectralClustering(n_clusters=4, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(ff_dae[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ff_dae[1:10000], spectral_labs)
ff_dae_spectral = neigh.predict(ff_dae)



In [17]:
spectral_labs =SpectralClustering(n_clusters=4, assign_labels='discretize', affinity='nearest_neighbors', random_state=0).fit(ff_dcae[1:10000]).labels_
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(ff_dcae[1:10000], spectral_labs)
ff_dcae_spectral = neigh.predict(ff_dcae)



## Concat Results

In [18]:
ff_results = np.vstack((ff_pca_kmeans, ff_dae_kmeans, ff_dcae_kmeans, 
                        ff_pca_agglom, ff_dae_agglom, ff_dcae_agglom, 
                        ff_pca_spectral, ff_dae_spectral, ff_dcae_spectral))
ff_results_df = pd.DataFrame(ff_results)
ff_results_df.index = ["PCA_KMeans", "DAE_KMeans", "DCAE_KMeans", 
    "PCA_Agglom", "DAE_Agglom", "DCAE_Agglom", 
    "PCA_Spectral", "DAE_Spectral", "DCAE_Spectral"]

In [19]:
ff_results_df.to_csv("forestfire_results.csv", )

In [20]:
ff_results_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,99990,99991,99992,99993,99994,99995,99996,99997,99998,99999
PCA_KMeans,1,2,1,1,1,1,0,1,1,1,...,1,0,0,1,1,2,0,1,0,0
DAE_KMeans,3,2,1,3,1,3,0,3,3,3,...,3,0,0,1,3,2,0,1,0,0
DCAE_KMeans,1,3,2,1,2,1,0,2,1,1,...,1,0,0,2,1,3,0,2,0,0
PCA_Agglom,0,3,0,0,0,0,1,0,0,0,...,0,1,1,0,0,3,1,0,1,1
DAE_Agglom,3,2,1,3,1,3,0,1,3,3,...,3,0,0,1,3,2,0,1,0,0
DCAE_Agglom,2,0,3,2,0,2,1,2,2,2,...,2,1,1,3,2,0,1,3,1,1
PCA_Spectral,3,3,3,3,3,3,0,3,3,3,...,3,0,0,3,3,3,0,3,1,0
DAE_Spectral,3,0,0,3,0,3,0,0,3,3,...,3,1,1,0,3,0,1,0,2,0
DCAE_Spectral,3,3,3,3,3,3,0,3,3,3,...,3,0,0,3,3,3,1,3,2,0
