## Original code is from https://github.com/margrietcox/LUS-scenario-discovery

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import fdopen, remove
from sklearn import metrics
import plotly.express as px
import seaborn as sns
%matplotlib inline
from scipy import stats
from numpy.lib import recfunctions as rf
import plotly.graph_objects as go

from sklearn.cluster import AgglomerativeClustering

In [8]:
## Load Data ##
map_list = []
for i in range(2000):
    name = 'map' + str(i)
    map_list.append(name)
    
# Kappa
kappa_df = pd.read_csv('E:/thesis_data/Output_DFs/kappa.csv', index_col= 'Unnamed: 0')
dist_kappa = 1 - kappa_df
# OA
oa_df = pd.read_csv('E:/thesis_data/Output_DFs/overallaccuracy_df.csv')
dist_oa = 1 - oa_df
# TAD
oad_df = pd.read_csv('E:/thesis_data/Output_DFs/overallallocationdifference_df.csv')
oad_df.index = map_list
np.fill_diagonal(oad_df.values, 0)

# Shannon
dist_shan = pd.read_csv('E:/thesis_data/Output_DFs/shannon_df.csv')
dist_shan.index = map_list

# Simpsons
dist_simp = pd.read_csv('E:/thesis_data/Output_DFs/simp_df.csv')
dist_simp.index = map_list

#Class 4 Residential
dist_tca4 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/TCA4_df.csv')
dist_tca4.index = map_list

dist_pland4 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/PLAND4_df.csv')
dist_pland4.index = map_list

#Class 14  Corn
dist_tca14 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/TCA14_df.csv')
dist_tca14.index = map_list

dist_pland14 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/PLAND14_df.csv')
dist_pland14.index = map_list

#Class 22 Nature
dist_tca22 = pd.read_csv('E:/thesis_data/Output_DFs/Total Class Area[class 22]_df.csv')
dist_tca22.index = map_list

dist_pland22 = pd.read_csv('E:/thesis_data/Output_DFs/PLAND[class 22]_df.csv')
dist_pland22.index = map_list

# OD
dist_od = pd.read_csv('E:/thesis_data/Output_DFs/overalldifference_df.csv')
dist_od.index = map_list
np.fill_diagonal(dist_od.values, 0)

# OQD
oqd_df = pd.read_csv('E:/thesis_data/Output_DFs/overallquantitydifference_df.csv')
oqd_df.index = map_list
np.fill_diagonal(oqd_df.values, 0)

# OQD_22
qd22_df = pd.read_csv('E:/thesis_data/Output_DFs/quantitydifferencecategorical_22_df.csv')
np.fill_diagonal(qd22_df.values, 0)

In [3]:
experimentsfile = experimentsfile = pd.read_csv('E:/thesis_data/experimentsfinal.csv')
experimentsfile = experimentsfile.drop(['Unnamed: 0'], axis=1)

In [9]:
kappaclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit_predict(dist_kappa)
oaclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit_predict(dist_oa)
odclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit_predict(dist_od)
oadclusters = AgglomerativeClustering(n_clusters=6, affinity='precomputed', linkage='complete').fit_predict(oad_df)
oqdclusters = AgglomerativeClustering(n_clusters=4, affinity='precomputed', linkage='complete').fit_predict(oqd_df)
qd22clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit_predict(qd22_df)
shannonclusters = AgglomerativeClustering(n_clusters=4, affinity='precomputed', linkage='complete').fit_predict(dist_shan)
simpsonclusters = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit_predict(dist_simp)
tca4clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit_predict(dist_tca4)
tca14clusters = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit_predict(dist_tca14)
tca22clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit_predict(dist_tca22)
pland4clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit_predict(dist_pland4)
pland14clusters = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit_predict(dist_pland14)
pland22clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit_predict(dist_pland22)

In [5]:
experimentsfile["inbreiding"] = experimentsfile["inbreiding"].replace(True, 'Liberal').replace(False, 'Restrictive')
experimentsfile["Claimfile"] = experimentsfile["Claimfile"].replace(True, 'High').replace(False, 'Low')
experimentsfile["Density"] = experimentsfile["Density"].replace(True, '1.33').replace(False, '1.00')
experimentsfile["TigrisXLfile"] = experimentsfile["TigrisXLfile"].replace(1, 'Very high').replace(2, 'High').replace(3, 'Low').replace(4, 'Very low')
experimentsfile["agrclaim"] = experimentsfile["agrclaim"].replace(True, 'High').replace(False, 'Low')
experimentsfile["natureclaim"] = experimentsfile["natureclaim"].replace(1, 'High').replace(2, 'Medium').replace(3, 'Low')
experimentsfile["naturepolicymap"] = experimentsfile["naturepolicymap"].replace(1, 'Robust').replace(2, 'EHS').replace(3, 'Functional')
experimentsfile["policyweightnature"] = experimentsfile["policyweightnature"].replace(1, 'Nat./Recr.').replace(2, 'Nature').replace(3, 'Nature').replace(4, 'No stimuli')
experimentsfile["recvalue"] = experimentsfile["recvalue"].replace(True, 'High').replace(False, 'Low')
experimentsfile["pumping"] = experimentsfile["pumping"].replace(True, 'Dry and wet').replace(False, 'Dry')
experimentsfile["restrictionnatureweight"] = experimentsfile["restrictionnatureweight"].replace(True, 'True').replace(False, 'False')
experimentsfile["spreidingratio"] = experimentsfile["spreidingratio"].replace(True, '0.5').replace(False, '0.0')
experimentsfile["valuenaturelocation"] = experimentsfile["valuenaturelocation"].replace(1, 'Neither').replace(2, 'Residential').replace(3, 'Nature')
cc_kappa = pd.DataFrame({'clusters' : kappaclusters})
experimentsfile['clusters'] = cc_kappa['clusters'].values
#experimentsfile['clusters'] = experimentsfile['clusters'].replace(0, '1').replace(1, '2').replace(2, '3').replace(3, '4').replace(4, '5').replace(5, '6')
#experimentsfile['clusters'] = experimentsfile['clusters'].replace('1', 1).replace('2', 2).replace('3', 3).replace('4', 4).replace('5', 5).replace('6', 6)
experimentsfile

Unnamed: 0,Claimfile,Density,TigrisXLfile,agrclaim,inbreiding,natureclaim,naturepolicymap,policyweightnature,pumping,recvalue,restrictionnatureweight,spreidingratio,valuenaturelocation,clusters
0,Low,1.33,Very high,High,Liberal,Low,EHS,Nature,Dry and wet,High,True,0.5,Neither,2
1,Low,1.00,Low,Low,Restrictive,Medium,Robust,No stimuli,Dry and wet,Low,False,0.5,Residential,6
2,High,1.33,High,High,Restrictive,Medium,Robust,No stimuli,Dry,High,False,0.0,Nature,1
3,High,1.00,Low,Low,Liberal,Medium,Functional,Nature,Dry,High,True,0.0,Residential,3
4,High,1.33,Low,High,Restrictive,Low,EHS,Nature,Dry and wet,Low,True,0.0,Residential,6
5,High,1.00,High,High,Restrictive,Low,Functional,Nat./Recr.,Dry,High,False,0.0,Residential,3
6,Low,1.00,Low,High,Liberal,Low,Functional,Nature,Dry and wet,Low,True,0.5,Neither,3
7,High,1.33,High,High,Restrictive,Medium,Robust,Nature,Dry and wet,High,False,0.0,Nature,4
8,Low,1.33,High,High,Liberal,High,Functional,Nature,Dry,Low,True,0.0,Nature,0
9,Low,1.33,Low,Low,Liberal,Low,Functional,Nature,Dry and wet,High,False,0.0,Neither,3


In [18]:
parallel_kappa = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_kappa.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_kappa.write_image('E:/thesis_images/parallelcats/kappa.png')

In [10]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_oa = pd.DataFrame({'clusters' : oaclusters})
experimentsfile['clusters'] = cc_oa['clusters'].values
parallel_oa = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_oa.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_oa.write_image('E:/thesis_images/parallelcats/oa.png')

In [10]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_od = pd.DataFrame({'clusters' : odclusters})
experimentsfile['clusters'] = cc_od['clusters'].values
parallel_od = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_od.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_od.write_image('E:/thesis_images/parallelcats/od.png')

In [18]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_oqd = pd.DataFrame({'clusters' : oqdclusters})
experimentsfile['clusters'] = cc_oqd['clusters'].values
parallel_oqd = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_oqd.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_oqd.write_image('E:/thesis_images/parallelcats/oqd.png')

In [19]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_oad = pd.DataFrame({'clusters' : oadclusters})
experimentsfile['clusters'] = cc_oad['clusters'].values
parallel_oad = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_oad.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_oad.write_image('E:/thesis_images/parallelcats/oad.png')

In [20]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_qd22 = pd.DataFrame({'clusters' : qd22clusters})
experimentsfile['clusters'] = cc_qd22['clusters'].values
parallel_qd22 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_qd22.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_qd22.write_image('E:/thesis_images/parallelcats/qd22.png')

In [21]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_shan = pd.DataFrame({'clusters' : shannonclusters})
experimentsfile['clusters'] = cc_shan['clusters'].values
parallel_shan = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_shan.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_shan.write_image('E:/thesis_images/parallelcats/shan.png')

In [29]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_simp = pd.DataFrame({'clusters' : simpsonclusters})
experimentsfile['clusters'] = cc_simp['clusters'].values
parallel_simp = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_simp.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_simp.write_image('E:/thesis_images/parallelcats/simp.png')

In [None]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_tca4 = pd.DataFrame({'clusters' : tca4clusters})
experimentsfile['clusters'] = cc_tca4['clusters'].values
parallel_tca4 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_tca4.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_tca4.write_image('E:/thesis_images/parallelcats/tca4.png')

In [23]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_tca14 = pd.DataFrame({'clusters' : tca14clusters})
experimentsfile['clusters'] = cc_tca14['clusters'].values
parallel_tca14 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_tca14.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_tca14.write_image('E:/thesis_images/parallelcats/tca14.png')

In [24]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_tca22 = pd.DataFrame({'clusters' : tca22clusters})
experimentsfile['clusters'] = cc_tca22['clusters'].values
parallel_tca22 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_tca22.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_tca22.write_image('E:/thesis_images/parallelcats/tca22.png')

In [26]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_pland4 = pd.DataFrame({'clusters' : pland4clusters})
experimentsfile['clusters'] = cc_pland4['clusters'].values
parallel_pland4 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_pland4.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_pland4.write_image('E:/thesis_images/parallelcats/pland4.png')

In [27]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_pland14 = pd.DataFrame({'clusters' : pland14clusters})
experimentsfile['clusters'] = cc_pland14['clusters'].values
parallel_pland14 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_pland14.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_pland14.write_image('E:/thesis_images/parallelcats/pland14.png')

In [28]:
# Overwrite experiments file cluster column to reuse it for other similarity metrics
cc_pland22 = pd.DataFrame({'clusters' : pland22clusters})
experimentsfile['clusters'] = cc_pland22['clusters'].values
parallel_pland22 = px.parallel_categories(experimentsfile, dimensions=['valuenaturelocation','TigrisXLfile', 'inbreiding', 'naturepolicymap','policyweightnature', 'pumping',
                                                                     'natureclaim', 'recvalue', 'agrclaim', 'Density', 'Claimfile', 'spreidingratio'], 
                             color = 'clusters', color_continuous_scale='Rainbow',
                             labels={'Claimfile' : 'Labour pref.','TigrisXLfile' : 'House/Job',
                                          'agrclaim' : 'Agr. claim', 'inbreiding': 'Spat. policy', 'natureclaim': 'Nat. claim', 'naturepolicymap': 'Nat. policy', 
                                     'policyweightnature' : 'Nat. stimuli', 'recvalue': 'Recr. claim', 'restrictionnatureweight' : 'Nat. restr.', 'valuenaturelocation' : 'Nat. Loc.',
                                     'pumping' : 'Nat. Phys.', 'spreidingratio' : 'Res. Loc.'}
                           )

parallel_pland22.update_layout(
    autosize=False,
    width=1600,
    height=800,
    font=dict(
        
        size=18
    )
    
)
parallel_pland22.write_image('E:/thesis_images/parallelcats/pland22.png')

# Isolate Cluster

In [30]:
experimentsfile2 = experimentsfile = pd.read_csv('E:/thesis_data/experimentsfinal.csv')
experimentsfile2 = experimentsfile.drop(['Unnamed: 0'], axis=1)
cluster_column = pd.DataFrame({'clusters' : kappaclusters})
experimentsfile2['clusters'] = cluster_column['clusters'].values
df2 = experimentsfile2
df2c1 = df2[df2.clusters == 1]
df2c2 = df2[df2.clusters == 2]
df2c3 = df2[df2.clusters == 3]
df2c4 = df2[df2.clusters == 4]
df2c5 = df2[df2.clusters == 5]
df2c6 = df2[df2.clusters == 6]

In [34]:
Residential_dim = go.parcats.Dimension(
    values=df2c4.TigrisXLfile,
    label="House/job claim",categoryarray=[1, 2, 3, 4], 
                                 ticktext = ['Very high', 'High', 'low', 'very low'])

Cluster_dim = go.parcats.Dimension(values=df2c4.inbreiding, label="Spatial policy", categoryarray=[False, True],
                                 ticktext = ['Restrictive', 'Liberal'])


# Create parcats trace
color = df2c4.TigrisXLfile;
colorscale = [[0, 'mediumseagreen'], [1, 'lightsteelblue']];

fig = go.Figure(data = [go.Parcats(dimensions=[Residential_dim, Cluster_dim],
        line={'color': color, 'colorscale': colorscale},
        hoveron='color', hoverinfo='count+probability',
        arrangement='freeform')])

fig.write_image('E:/thesis_images/parallelcats/cluster_iso.png')