In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os


%matplotlib inline

In [None]:
parent_dir = os.path.split(os.getcwd())[0]

In [None]:
zones = gpd.read_file(parent_dir + '\\Data\\New\\lms_zone_du_new.shp') # LMS Zone data

In [None]:
labels_advanced = np.load(parent_dir + '\\Data\\New\\cluster_labels_advanced.npy')
labels_simple = np.load(parent_dir + '\\Data\\New\\cluster_labels_simple.npy')
labels_du = np.array(zones.deg_urba - 1)

In [None]:
# Density
dens = pd.read_csv((parent_dir + '\\Data\\New\\lms_zone_density.csv'), index_col=0)

# Diversity
landuse = pd.read_csv((parent_dir + '\\Data\\New\\lms_diversity_lu.csv'), index_col=0)
hist = pd.read_csv((parent_dir + '\\Data\\New\\lms_zone_historical.csv'), index_col=0)

# Design
design = pd.read_csv((parent_dir + '\\Data\\New\\lms_zone_design.csv'), index_col=0) 

# Destination accessibility
dest = pd.read_csv((parent_dir + '\\Data\\New\\lms_zone_dest_access.csv'), index_col=0) 

# Distance to transit
transit = pd.read_csv((parent_dir + '\\Data\\New\\lms_zone_transit.csv'), index_col=0) 

# Demography
demo = pd.read_csv((parent_dir + '\\Data\\New\\zone_demographics.csv'), index_col=0) 

In [None]:
ovin = pd.read_csv(parent_dir + '\\Data\\New\\Ovin_final.csv', index_col=0)

## Modal split travel behaviour
ovin_tb = pd.read_csv(parent_dir + '\\Data\\New\\lms_zone_ovin_travel_behaviour_newF.csv', index_col=0)
lms_tb = pd.read_csv(parent_dir + '\\Data\\New\\lms_zone_lms_modal_split.csv', index_col=0)
lms_orig = pd.read_csv(parent_dir + '\\Data\\New\\lms_modal_split_orig_abs.csv', index_col=0)
lms_tot = lms_orig.iloc[:, 1:8].sum(axis=1) # Total trips for each zone

lms_tb2 = lms_tb.iloc[:, 1:8].copy()
lms_tb2.iloc[:, 3] = lms_tb2.iloc[:, 3:5].sum(axis=1)
lms_tb2 = lms_tb2.drop(columns='Tram/Metro_o')

# Functions

In [None]:
def modal_split_ovin(n, labels_cluster, idx=np.arange(1406)):

    """"Calculate modal split for different clusters based on OViN data"""


    df_list = []
    tb_list = np.zeros((n, 6))
    std_list = np.zeros((n, 6))

    for i in range(n):

        df_list.append(ovin_tb.iloc[idx][labels_cluster[idx] == i].copy())
        tot = df_list[i].iloc[:, -3]
        tb_list[i] = np.array(df_list[i].iloc[:, 1:7].multiply(tot, axis='index').sum() / tot.sum())

        df_list[i][df_list[i].isnull()] = 0

        for m in range(6):
            if tot.sum() != 0:
                std_list[i, m] = np.sqrt(np.cov(df_list[i].iloc[:, 1 + m], aweights=tot))        
    
    tb_list[tb_list != tb_list] = 0
    
    return tb_list, std_list

In [None]:
def sort_data(n, labels_cluster, tb_list, be):

    mean_be = np.zeros((len(be.columns), n))

    for v in range(len(be.columns)):
        for c in range(n):
            
            mean_be[v, c] = be.iloc[:, v][labels_cluster == c].mean()

    x_sort = np.arange(n)[np.argsort(-tb_list[:, 0])]
    # lms_tblist = lms_list[np.argsort(-tb_list[:, 0])]
    tb_list_n = tb_list[np.argsort(-tb_list[:, 0])]
    mean_be = mean_be.T[np.argsort(-tb_list[:, 0])].T

    cluster_size = np.zeros(n)

    for i in range(n):
        cluster_size[i] = len(labels_cluster[labels_cluster == i])

    cluster_size = cluster_size[np.argsort(-tb_list[:, 0])]

    return tb_list_n, x_sort, mean_be, cluster_size

In [None]:
be_complicated = dens[['Pop_dens', 'Surrounding_pop_dens', 'Job_dens', 'Surrounding_job_dens']].copy()
be_complicated['Services'] = landuse[['Services']] * 100
be_complicated['House_45_less'] = hist[['House_45_less']]
be_complicated['Road_density'] = design[['Road_density']]
be_complicated['Dist_point_of_interest'] = dest[['Dist_point_of_interest']]
be_complicated['Bus_stops'] = transit[['Bus_stops']]
be_complicated['TM_stops'] = transit[['Tram_stops', 'Metro_stops']].sum(axis=1)
be_complicated['Parking_fare'] = transit[['Parking_fare']]

In [None]:
be_complicated.iloc[:, 0]

In [None]:
n = 7
tb_list, std_list =  modal_split_ovin(n, labels_advanced)

In [None]:
tb_list_n, x_sort, mean_be, cluster_size = sort_data(n, labels_advanced, tb_list, be_complicated)

In [None]:
be_complicated.iloc[:, 0]

In [None]:
f, ax = plt.subplots(3, len(be_complicated.columns) // 3 + 1)
f.set_figwidth(20)
f.set_figheight(16)

k = 0

colors = ["#56B4E9",  "#E69F00", "#F0E442", "#0072B2", "#009E73", "#CC79A7",  "#D55E00", ]
xlabels = ['Population density [people/ha]', 'Population density surrounding zones [people/ha]',
           'Job density [jobs/ha]', 'Job density surrounding zones [jobs/ha]', 'Percentage of service land use',
           'Percentage of houses built before 1945', 'Road density [km/km2]', 'Average distance to points of interest [km]',
           'Number of bus stops within 2.5 km', 'Number of tram/metro stops within 2.5 km', 'Average parking fee [eurocents]']

titles = ['Population density', 'Population density surrounding zones', 'Job density', 'Job density surrounding zones',
          'Service land use', 'Houses built before 1945', 'Road density', 'Distance to points of interest',
          'Bus stops', 'Tram/metro stops', 'Parking fee']

for h in range(3):
    for i in range(len(be_complicated.columns) // 3 + 1):

        # ax[i].plot(np.arange(n), mean_be[i])

        
        
        for j in range(n):
            if k > 0:
                violin_part = ax[h, i].violinplot(be_complicated.iloc[:, k - 1][labels_advanced == x_sort[j]].dropna(), positions=[j], vert=False,
                            showmeans=True)
                
                for partname in ('cbars', 'cmins', 'cmaxes', 'cmeans'):
                    vp = violin_part[partname]
                    vp.set_edgecolor(colors[x_sort[j]])

                for vp in violin_part['bodies']:
                    vp.set_facecolor(colors[x_sort[j]])
                    vp.set_alpha(0.5)

                ax[h, i].set_xlabel(xlabels[k - 1])
                ax[h, i].set_ylabel('Cluster numbers')
                
            
            else:
                ax[h, i].set_xticks([], [])
                ax[h, i].set_yticks([], [])

        
        
        if k > 0:
            ax[h, i].set_title(titles[k - 1])
        
            ax[h, i].set_yticks(np.arange(n), labels=x_sort)

        # ax[h, i].set_facecolor('lightcyan')

        k += 1

# f.set_facecolor('lightcyan')
# f.set_facecolor('lightskyblue')

    


    # 

In [None]:
f, ax = plt.subplots(3, len(be_complicated.columns) // 3 + 1)
f.set_figwidth(20)
f.set_figheight(16)

k = 0

colors = ["#56B4E9",  "#E69F00", "#F0E442", "#0072B2", "#009E73", "#CC79A7",  "#D55E00", ]
xlabels = ['Population density [people/ha]', 'Population density surrounding zones [people/ha]',
           'Job density [jobs/ha]', 'Job density surrounding zones [jobs/ha]', 'Percentage of service land use',
           'Percentage of houses built before 1945', 'Road density [km/km2]', 'Average distance to points of interest [km]',
           'Number of bus stops within 2.5 km', 'Number of tram/metro stops within 2.5 km', 'Average parking fee [eurocents]']

titles = ['Population density', 'Population density surrounding zones', 'Job density', 'Job density surrounding zones',
          'Service land use', 'Houses built before 1945', 'Road density', 'Distance to points of interest',
          'Bus stops', 'Tram/metro stops', 'Parking fee']

for h in range(3):
    for i in range(len(be_complicated.columns) // 3 + 1):

        # ax[i].plot(np.arange(n), mean_be[i])

        
        
        for j in range(6):
            if k > 0:
                violin_part = ax[h, i].violinplot(be_complicated.iloc[:, k - 1][labels_du == j].dropna(), positions=[j], vert=False,
                            showmeans=True)
                
                for partname in ('cbars', 'cmins', 'cmaxes', 'cmeans'):
                    vp = violin_part[partname]
                    vp.set_edgecolor(colors[x_sort[j]])

                for vp in violin_part['bodies']:
                    vp.set_facecolor(colors[x_sort[j]])
                    vp.set_alpha(0.5)

                ax[h, i].set_xlabel(xlabels[k - 1])
                ax[h, i].set_ylabel('Degress of urbanisation')
                
            
            else:
                ax[h, i].set_xticks([], [])
                ax[h, i].set_yticks([], [])

        
        
        if k > 0:
            ax[h, i].set_title(titles[k - 1])
        
            ax[h, i].set_yticks(np.arange(6), labels=np.arange(1, 7))

        # ax[h, i].set_facecolor('lightcyan')

        k += 1

## Simple cluster set

In [None]:
be_simple = dens[['Pop_dens', 'Surrounding_pop_dens', 'Surrounding_job_dens']].copy()
be_simple['Services'] = landuse[['Services']]
# be_simple['House_45_less'] = hist[['House_45_less']]
be_simple['Road_density'] = design[['Road_density']]
# be_simple['Dist_point_of_interest'] = dest[['Dist_point_of_interest']]
# be_simple['Bus_stops'] = transit[['Bus_stops']]
be_simple['TM_stops'] = transit[['Tram_stops', 'Metro_stops']].sum(axis=1)
be_simple['Parking_fare'] = transit[['Parking_fare']]

In [None]:
n = 7
tb_list, std_list =  modal_split_ovin(n, labels_simple)
tb_list_n, x_sort, mean_be, cluster_size = sort_data(n, labels_simple, tb_list, be_simple)

In [None]:
f, ax = plt.subplots(2, len(be_simple.columns) // 2 + 1)
f.set_figwidth(20)
f.set_figheight(11)

k = 0

colors = ["#CC79A7", "#E69F00", "#F0E442", "#0072B2", "#009E73", "#56B4E9",  "#D55E00",]
xlabels = ['Population density [people/ha]', 'Population density surrounding zones [people/ha]',
           'Job density surrounding zones [jobs/ha]', 'Percentage of service land use',
           'Road density [km/km2]', 'Number of tram/metro stops within 2.5 km', 'Average parking fee [eurocents]']

titles = ['Population density', 'Population density surrounding zones', 'Job density surrounding zones',
          'Service land use', 'Road density',
          'Tram/metro stops', 'Parking fee']

for h in range(2):
    for i in range(len(be_simple.columns) // 2 + 1):

        # ax[i].plot(np.arange(n), mean_be[i])

        
        
        for j in range(n):
            if k > 0:
                violin_part = ax[h, i].violinplot(be_simple.iloc[:, k - 1][labels_simple == x_sort[j]].dropna(), positions=[j], vert=False,
                            showmeans=True)
                
                for partname in ('cbars', 'cmins', 'cmaxes', 'cmeans'):
                    vp = violin_part[partname]
                    vp.set_edgecolor(colors[x_sort[j]])

                for vp in violin_part['bodies']:
                    vp.set_facecolor(colors[x_sort[j]])
                    vp.set_alpha(0.5)

                ax[h, i].set_xlabel(xlabels[k - 1])
                ax[h, i].set_ylabel('Cluster numbers')
                
            
            else:
                ax[h, i].set_xticks([], [])
                ax[h, i].set_yticks([], [])

        
        
        if k > 0:
            ax[h, i].set_title(titles[k - 1])
        
            ax[h, i].set_yticks(np.arange(n), labels=x_sort)

        # ax[h, i].set_facecolor('lightcyan')

        k += 1

In [None]:
f, ax = plt.subplots(2, len(be_simple.columns) // 2 + 1)
f.set_figwidth(20)
f.set_figheight(11)

k = 0

colors = ["#CC79A7", "#E69F00", "#F0E442", "#0072B2", "#009E73", "#56B4E9",  "#D55E00",]
xlabels = ['Population density [people/ha]', 'Population density surrounding zones [people/ha]',
           'Job density surrounding zones [jobs/ha]', 'Share of service land use',
           'Road density [km/km2]', 'Number of tram/metro stops within 2.5 km', 'Average parking fee [eurocents]']

titles = ['Population density', 'Population density surrounding zones', 'Job density surrounding zones',
          'Service land use', 'Road density',
          'Tram/metro stops', 'Parking fee']

for h in range(2):
    for i in range(len(be_simple.columns) // 2 + 1):

        # ax[i].plot(np.arange(n), mean_be[i])

        
        
        for j in range(6):
            if k > 0:
                violin_part = ax[h, i].violinplot(be_simple.iloc[:, k - 1][labels_du == j].dropna(), positions=[j], vert=False,
                            showmeans=True)
                
                for partname in ('cbars', 'cmins', 'cmaxes', 'cmeans'):
                    vp = violin_part[partname]
                    vp.set_edgecolor(colors[x_sort[j]])

                for vp in violin_part['bodies']:
                    vp.set_facecolor(colors[x_sort[j]])
                    vp.set_alpha(0.5)

                ax[h, i].set_xlabel(xlabels[k - 1])
                ax[h, i].set_ylabel('Cluster numbers')
                
            
            else:
                ax[h, i].set_xticks([], [])
                ax[h, i].set_yticks([], [])

        
        
        if k > 0:
            ax[h, i].set_title(titles[k - 1])
        
            ax[h, i].set_yticks(np.arange(6), labels=np.arange(1, 7))

        # ax[h, i].set_facecolor('lightcyan')

        k += 1