In [1]:
import geopandas as gpd
from tqdm import tqdm
import libpysal as lp
import json
import pandas as pd
from matplotlib.patches import Patch  # used for the plot legends later

###  Get the data in the folder ./LCPS_data for pre-processing. Let's call this `retrieved data`.

In [2]:
def retrieve_data(sy):
    """
    Retrieve data for a given school year (sy)
    """
    data_dir = "./LCPS_data"
    # Read the data files
    schools = gpd.read_file('{}/LCPS_Sites_{}.shp'.format(data_dir, sy))
    students = gpd.read_file('{}/Students_{}.shp'.format(data_dir, sy))
    spas = gpd.read_file('{}/PlanningZones_{}.shp'.format(data_dir, sy))
    
    return schools, students, spas


In [3]:
sy = '2017_2018'

In [4]:
# raw_schools, raw_students, raw_spas = retrieve_data(sy)

###  Get the processed data from the folder ./data

In [5]:
#spas_processed = gpd.read_file('new_SPAs_2017_2018.json', driver='GeoJSON')
spas = gpd.read_file('./data/new_SPAs_2017_2018.json', driver='GeoJSON')
students = gpd.read_file('./data/new_Students_2017_2018.json', driver='GeoJSON')
schools = gpd.read_file('./data/new_Schools_2017_2018.json', driver='GeoJSON')




#new line to run the newly downloaded json files, should move around anywhere.
#new line to run the newly downloaded json files, should move around anywhere.

In [6]:
#students.head()
#schools.head()
spas.head()

#uncomment the one you want to see

Unnamed: 0,OBJECTID,COUNT_,SPA,ELEM_,INT_,HIGH_,ELEM_CODE,MID_CODE,HIGH_CODE,DISTRICT,...,HIGH_POP_N,HIGH_POP_P,TOT_POP_A,TOT_POP_W,TOT_POP_M2,TOT_POP_B,TOT_POP_H,TOT_POP_N,TOT_POP_P,geometry
0,1,5.0,WL03.4,126,205,311,LOV,HRM,WHS,WL,...,0,0,1,37,0,2,7,0,0,"POLYGON ((11725868.086 7152149.054, 11725870.6..."
1,2,5.0,WL03,126,205,311,LOV,HRM,WHS,WL,...,0,0,0,83,5,0,6,1,0,"POLYGON ((11726805.147 7160565.439, 11726826.6..."
2,3,5.0,WL27.2,118,202,305,HAM,BRM,LVH,WL,...,0,0,0,11,0,1,5,0,0,"POLYGON ((11720159.464 7099502.139, 11720172.1..."
3,4,5.0,WL32.1,132,205,311,MTV,HRM,WHS,WL,...,0,0,8,205,14,8,23,1,0,"POLYGON ((11694466.485 7097608.657, 11694418.5..."
4,5,5.0,WL02,126,205,311,LOV,HRM,WHS,WL,...,0,0,9,170,3,0,5,0,0,"POLYGON ((11715899.154 7166497.655, 11715932.1..."


## Analysis Using New SPAs Data

In [7]:
spas.keys()

Index(['OBJECTID', 'COUNT_', 'SPA', 'ELEM_', 'INT_', 'HIGH_', 'ELEM_CODE',
       'MID_CODE', 'HIGH_CODE', 'DISTRICT', 'UTILITIES', 'SHAPE_Leng',
       'SHAPE_Area', 'STDYAREA_1', 'DISTRICT_1', 'PLANNING_Z', 'ELEM', 'MID',
       'HIGH', 'UTILITIE_1', 'DISTRICT_2', 'MID_2018', 'HIGH_2018', 'PK', 'KG',
       'GR1', 'GR2', 'GR3', 'GR4', 'GR5', 'GR6', 'GR7', 'GR8', 'GR9', 'GR10',
       'GR11', 'GR12', 'TOTAL_KG_5', 'TOTAL_6_8', 'TOTAL_9_12', 'TOTAL_KG_1',
       'TOTAL_PK_1', 'ELEM_POP', 'MID_POP', 'HIGH_POP', 'TOT_POP',
       'ELEM_POP_A', 'ELEM_POP_W', 'ELEM_POP_M2', 'ELEM_POP_B', 'ELEM_POP_H',
       'ELEM_POP_N', 'ELEM_POP_P', 'MID_POP_A', 'MID_POP_W', 'MID_POP_M2',
       'MID_POP_B', 'MID_POP_H', 'MID_POP_N', 'MID_POP_P', 'HIGH_POP_A',
       'HIGH_POP_W', 'HIGH_POP_M2', 'HIGH_POP_B', 'HIGH_POP_H', 'HIGH_POP_N',
       'HIGH_POP_P', 'TOT_POP_A', 'TOT_POP_W', 'TOT_POP_M2', 'TOT_POP_B',
       'TOT_POP_H', 'TOT_POP_N', 'TOT_POP_P', 'geometry'],
      dtype='object')

In [8]:
# Choose possible grade level; ELEM, MID, HIGH for SPAs

grade = 'HIGH'
asians = '{}_POP_A'.format(grade)
whites = '{}_POP_W'.format(grade)
mixed = '{}_POP_M2'.format(grade)
blacks = '{}_POP_B'.format(grade)
hispanics = '{}_POP_H'.format(grade)
natives = '{}_POP_N'.format(grade)
pacifics = '{}_POP_P'.format(grade)
non_whites = '{}_POP_NW'.format(grade)


spas_ethnic = spas[['{}_CODE'.format(grade) , asians, whites, mixed, blacks, hispanics, natives, pacifics, '{}_POP'.format(grade)]]
spas_ethnic.loc[:, non_whites] = spas_ethnic.loc[:, asians] +  spas_ethnic.loc[:,blacks] +  spas_ethnic.loc[:,mixed] \
                            + spas_ethnic.loc[:,hispanics] + spas_ethnic.loc[:,natives] + spas_ethnic.loc[:,pacifics]
spas_grouped_raw = spas_ethnic.groupby(['{}_CODE'.format(grade) ]).sum()
spas_grouped = spas_grouped_raw.div(spas_grouped_raw['{}_POP'.format(grade)], axis=0)
spas_grouped.drop([asians, mixed, blacks, hispanics, natives, pacifics, '{}_POP'.format(grade)], axis='columns', inplace=True)
spas_grouped.head()

#when generalizing one place to change and use the FORMAT style

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.499678,0.500322
BWH,0.559783,0.440217
DMH,0.445006,0.554994
FHS,0.494332,0.505668
HTH,0.536534,0.463466


In [9]:
#NOTE: Run this only once or start from the previous cell on subsequent runs - since we use iterrows and create a new column, the calculated value will change on the next runs

def get_measurements(df):
    from numpy import log
    import itertools
    
    # calculate absolute differences
    abs_diffs = []
    for row in df.iterrows():
    #     print([val for val in itertools.permutations(row[1], 2)])
    #     break
        diffs = [abs(val[1] - val[0]) for val in itertools.permutations(row[1], 2)]
        total = sum(diffs)/2
        abs_diffs.append(total)
    df.loc[:,'ABS_DIFF'] = abs_diffs

    # calculate dissimilarity index
    ind_diss = 0.5 * sum(abs(df[whites]/df[whites].sum() - df[non_whites]/df[non_whites].sum()))

    # calculate entropy index
    entropy = -(df[non_whites]*log(df[non_whites]) + df[whites]*log(df[whites]))
    entropy.replace(float('NaN'),0, inplace=True)

    df.loc[:,'ENTROPY_IND'] = entropy

    return df, ind_diss



In [10]:
measurements_df, ind_diss_sa1 = get_measurements(spas_grouped)
segregated_abs_diff = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
print(ind_diss_sa1)
segregated_entropy

0.22418847183281645


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [11]:
# measurements_df.loc[interested_districts,:]
measurements_df

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BRH,0.499678,0.500322,0.000643,0.693147
BWH,0.559783,0.440217,0.119565,0.685982
DMH,0.445006,0.554994,0.109987,0.687086
FHS,0.494332,0.505668,0.011336,0.693083
HTH,0.536534,0.463466,0.073069,0.690475
JCH,0.397834,0.602166,0.204333,0.672123
LCH,0.617251,0.382749,0.234501,0.665394
LVH,0.78388,0.21612,0.56776,0.521953
PFH,0.504132,0.495868,0.008264,0.693113
PVH,0.144881,0.855119,0.710238,0.413726


In [12]:
ax, props = spas_grouped.boxplot(column=[whites, non_whites], figsize=(12,8), return_type='both')

## Conclusions for Original Data


Overall, using the dissimilarity index (0.2), there does not appear to be much segregation within the districts.

However, individually, using the absolute differences (0.7 cut-off) and entropy index (0.4 cut-off), there seems to be evidence of segregation in 1 district; PVH. 


In [13]:
segregated_abs_diff

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PVH,0.144881,0.855119,0.710238,0.413726


In [14]:
segregated_entropy

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [15]:
# legend_handles = [Patch(facecolor='red', label='GUI'),
#                 Patch(facecolor='green', label='LIN'),
#                 Patch(facecolor='yellow', label='SUG'),
#                 Patch(facecolor='orange', label='SUL')]

# ax = spas.boundary.plot(figsize=(15,14))
# spas[spas['ELEM_CODE'] == 'GUI'].plot(ax=ax, color='red')
# spas[spas['ELEM_CODE'] == 'LIN'].plot(ax=ax, color='green')
# spas[spas['ELEM_CODE'] == 'SUG'].plot(ax=ax, color='yellow')
# spas[spas['ELEM_CODE'] == 'SUL'].plot(ax=ax, color='orange')
# # ax.legend
# ax.legend(handles=legend_handles)

# Comparison to data generated by Algorithms

In [16]:
#spas_processed = gpd.read_file('new_SPAs_2017_2018.json', driver='GeoJSON')
# sa = json.loads('./results/SA/run7_ES_SA.json')
# shc = gpd.read_file('./results/SHC/run7_ES_SHC.json', driver='GeoJSON')
# ts = gpd.read_file('./results/TS/run7_ES_TS.json', driver='GeoJSON')

import json

def retrieve_output_data(algo, level, plan):
    output_df = pd.DataFrame({"SPA": [], "NEW_{}_CODE".format(grade): []})  #grade is a global variable
    result_dir = 'results'
    #Load the file as json. 
    with open('{}/{}/run7_{}_{}.json'.format(result_dir, algo, level, algo)) as file:
        #readlines() reads the file as string and loads() loads it into a dict
        obj = json.loads(''.join(file.readlines()))

        #Then iterate its items() as key value pairs
        for spa_code, elem_code in obj[str(plan)]['info']['Final']['zoneIds'].items():
            output_df.loc[len(output_df.index)] = [spa_code, elem_code]

    return output_df
#new line to run the newly downloaded json files, should move around anywhere.

In [17]:
import numpy as np


In [18]:
def group_data(merged_spas):
    merged_spas_ethnic = merged_spas[['NEW_{}_CODE'.format(grade) , asians, whites, mixed, blacks, hispanics, natives, pacifics, '{}_POP'.format(grade), 'geometry']]
    merged_spas_ethnic.loc[:, non_whites] = merged_spas_ethnic.loc[:, asians] +  merged_spas_ethnic.loc[:,blacks] +  merged_spas_ethnic.loc[:,mixed] \
                                + merged_spas_ethnic.loc[:,hispanics] + merged_spas_ethnic.loc[:,natives] + merged_spas_ethnic.loc[:,pacifics]
    merged_spa_grouped_raw = merged_spas_ethnic.groupby(['NEW_{}_CODE'.format(grade) ]).sum()
    merged_spa_grouped = merged_spa_grouped_raw.div(merged_spa_grouped_raw['{}_POP'.format(grade)], axis=0)
    merged_spa_grouped.drop([asians, mixed, blacks, hispanics, natives, pacifics, '{}_POP'.format(grade)], axis='columns', inplace=True)
    return merged_spa_grouped


In [19]:
ind_diss = []

## Output Plan 1

### Simulated Annealing (SA)

In [20]:
plan = 1
algo = 'SA'
level = 'HS'
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.564047,0.435953
BWH,0.463115,0.536885
DMH,0.434715,0.565285
FHS,0.46424,0.53576
HTH,0.727342,0.272658


In [21]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa1 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa1 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa1

0.2405342946496926


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 2

### Simulated Annealing (SA)

In [22]:
plan = 2
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.431045,0.568955
BWH,0.570787,0.429213
DMH,0.44622,0.55378
FHS,0.456304,0.543696
HTH,0.602296,0.397704


In [23]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa2 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa2 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa2

0.2563467385054629


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 3

### Simulated Annealing (SA)

In [24]:
plan = 3
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.475617,0.524383
BWH,0.610465,0.389535
DMH,0.206057,0.793943
FHS,0.45895,0.54105
HTH,0.775907,0.224093


In [25]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa3 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa3 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa3

0.2614020318596999


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 4

### Simulated Annealing (SA)

In [26]:
plan = 4
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.498903,0.501097
BWH,0.544841,0.455159
DMH,0.37032,0.62968
FHS,0.460993,0.539007
HTH,0.554386,0.445614


In [27]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa4 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa4 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa4

0.25188592537133236


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 5

### Simulated Annealing (SA)

In [28]:
plan = 5
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.478304,0.521696
BWH,0.562724,0.437276
DMH,0.336321,0.663679
FHS,0.429386,0.570614
HTH,0.771776,0.228224


In [29]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa5 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa5 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa5

0.2750785065756006


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 6

### Simulated Annealing (SA)

In [30]:
plan = 6
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.514678,0.485322
BWH,0.518129,0.481871
DMH,0.550072,0.449928
FHS,0.463554,0.536446
HTH,0.745739,0.254261


In [31]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa6 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa6 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa6

0.23706017593595383


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 7

### Simulated Annealing (SA)

In [32]:
plan = 7
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.521456,0.478544
BWH,0.510006,0.489994
DMH,0.470525,0.529475
FHS,0.468489,0.531511
HTH,0.701361,0.298639


In [33]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa7 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa7 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa7

0.24326744736810463


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 8

### Simulated Annealing (SA)

In [34]:
plan = 8
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.409574,0.590426
BWH,0.588005,0.411995
DMH,0.456173,0.543827
FHS,0.457831,0.542169
HTH,0.74639,0.25361


In [35]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa8 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa8 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa8

0.24196629660424882


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 9

### Simulated Annealing (SA)

In [36]:
plan = 9
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.567148,0.432852
BWH,0.453179,0.546821
DMH,0.381628,0.618372
FHS,0.46406,0.53594
HTH,0.753258,0.246742


In [37]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa9 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa9 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa9

0.2510293205290445


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 10

### Simulated Annealing (SA)

In [38]:
plan = 10
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.540938,0.459062
BWH,0.560944,0.439056
DMH,0.420033,0.579967
FHS,0.483781,0.516219
HTH,0.595118,0.404882


In [39]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa10 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa10 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa10

0.24506562121598427


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 11

### Simulated Annealing (SA)

In [40]:
plan = 11
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.455856,0.544144
BWH,0.550536,0.449464
DMH,0.406962,0.593038
FHS,0.483136,0.516864
HTH,0.445006,0.554994


In [41]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa11 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa11 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa11

0.24994790105289832


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 12

### Simulated Annealing (SA)

In [42]:
plan = 12
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.484108,0.515892
BWH,0.574245,0.425755
DMH,0.225476,0.774524
FHS,0.400791,0.599209
HTH,0.716072,0.283928


In [43]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa12 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa12 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa12

0.30329047505458456


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 13

### Simulated Annealing (SA)

In [44]:
plan = 13
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.48088,0.51912
BWH,0.564295,0.435705
DMH,0.475831,0.524169
FHS,0.446465,0.553535
HTH,0.690741,0.309259


In [45]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa13 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa13 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa13

0.2567345456095197


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 14

### Simulated Annealing (SA)

In [46]:
plan = 14
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.495238,0.504762
BWH,0.551929,0.448071
DMH,0.538927,0.461073
FHS,0.467446,0.532554
HTH,0.744133,0.255867


In [47]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa14 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa14 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa14

0.25524492070582094


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 15

### Simulated Annealing (SA)

In [48]:
plan = 15
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.506162,0.493838
BWH,0.550676,0.449324
DMH,0.327331,0.672669
FHS,0.397494,0.602506
HTH,0.626179,0.373821


In [49]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa15 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa15 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa15

0.2588190768319244


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 16

### Simulated Annealing (SA)

In [50]:
plan = 16
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.528719,0.471281
BWH,0.509721,0.490279
DMH,0.267108,0.732892
FHS,0.47294,0.52706
HTH,0.461202,0.538798


In [51]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa16 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa16 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa16

0.25484400802560914


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 17

### Simulated Annealing (SA)

In [52]:
plan = 17
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.533419,0.466581
BWH,0.497875,0.502125
DMH,0.430086,0.569914
FHS,0.459897,0.540103
HTH,0.543758,0.456242


In [53]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa17 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa17 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa17

0.22719579723185795


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 18

### Simulated Annealing (SA)

In [54]:
plan = 18
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.546032,0.453968
BWH,0.41008,0.58992
DMH,0.442053,0.557947
FHS,0.466807,0.533193
HTH,0.722752,0.277248


In [55]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa18 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa18 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa18

0.27358013937109127


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 19

### Simulated Annealing (SA)

In [56]:
plan = 19
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.552097,0.447903
BWH,0.471631,0.528369
DMH,0.214898,0.785102
FHS,0.488421,0.511579
HTH,0.744166,0.255834


In [57]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa19 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa19 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa19

0.27734193278154584


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 20

### Simulated Annealing (SA)

In [58]:
plan = 20
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.503687,0.496313
BWH,0.577698,0.422302
DMH,0.327273,0.672727
FHS,0.458395,0.541605
HTH,0.695528,0.304472


In [59]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa20 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa20 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa20

0.2592578801966154


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 21

### Simulated Annealing (SA)

In [60]:
plan = 21
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.555556,0.444444
BWH,0.575389,0.424611
DMH,0.27138,0.72862
FHS,0.47402,0.52598
HTH,0.582418,0.417582


In [61]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa21 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa21 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa21

0.25632468568615374


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 22

### Simulated Annealing (SA)

In [62]:
plan = 22
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.327429,0.672571
BWH,0.536535,0.463465
DMH,0.388854,0.611146
FHS,0.472461,0.527539
HTH,0.774566,0.225434


In [63]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa22 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa22 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa22

0.27261742942400324


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 23

### Simulated Annealing (SA)

In [64]:
plan = 23
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.523989,0.476011
BWH,0.425253,0.574747
DMH,0.440878,0.559122
FHS,0.444264,0.555736
HTH,0.501481,0.498519


In [65]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa23 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa23 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa23

0.22520770155357456


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 24

### Simulated Annealing (SA)

In [66]:
plan = 24
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.587849,0.412151
BWH,0.458893,0.541107
DMH,0.492174,0.507826
FHS,0.463881,0.536119
HTH,0.519639,0.480361


In [67]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa24 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa24 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa24

0.25874396296843066


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Output Plan 25

### Simulated Annealing (SA)

In [68]:
plan = 25
sa_output_df = retrieve_output_data(algo, level, plan)
new_spas_sa = pd.merge(spas, sa_output_df, on='SPA')
new_spas_sa_grouped = group_data(new_spas_sa)
# vals= np.concatenate([new_spas_sa_grouped.loc[i,:].values for i in interested_districts])
# data_array[plan-1,:] = vals

new_spas_sa_grouped.head()
#To change everything down to SA or TS

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1
BRH,0.474878,0.525122
BWH,0.407029,0.592971
DMH,0.458732,0.541268
FHS,0.466269,0.533731
HTH,0.746599,0.253401


In [69]:
measurements_df, ind_diss_sa1 = get_measurements(new_spas_sa_grouped)
segregated_abs_diff_sa25 = measurements_df[measurements_df['ABS_DIFF'] > 0.7]
segregated_entropy_sa25 = measurements_df[measurements_df['ENTROPY_IND'] < 0.4]
ind_diss.append(ind_diss_sa1)
print(ind_diss_sa1)
segregated_entropy_sa25

0.2546929352173352


Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
NEW_HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


## Conclusions for New Data after Algorithms


Overall, using the dissimilarity index (~ 0.2), there does not appear to be any segregation within the districts.

Individually, using the absolute differences (0.7 cut-off) and entropy index (0.4 cut-off), there seems to be no evidence of segregation in some districts. It may be that the algorithm corrects the outlier PVH from the original data.

In [70]:
print(segregated_abs_diff_sa1)
print(segregated_abs_diff_sa2)
print(segregated_abs_diff_sa3)
print(segregated_abs_diff_sa4)
print(segregated_abs_diff_sa5)
print(segregated_abs_diff_sa6)
print(segregated_abs_diff_sa7)
print(segregated_abs_diff_sa8)
print(segregated_abs_diff_sa9)
print(segregated_abs_diff_sa10)
print(segregated_abs_diff_sa11)
print(segregated_abs_diff_sa12)
print(segregated_abs_diff_sa13)
print(segregated_abs_diff_sa14)
print(segregated_abs_diff_sa15)
print(segregated_abs_diff_sa16)
print(segregated_abs_diff_sa17)
print(segregated_abs_diff_sa18)
print(segregated_abs_diff_sa19)
print(segregated_abs_diff_sa20)
print(segregated_abs_diff_sa21)
print(segregated_abs_diff_sa22)
print(segregated_abs_diff_sa23)
print(segregated_abs_diff_sa24)
print(segregated_abs_diff_sa25)

Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
               HIGH_POP_W  HIGH_POP_NW  ABS_DIFF  ENTROPY_IND
NEW_HIGH_CODE                                                
PVH              0.143862     0.856138  0.712276     0.411912
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIF

In [71]:
print(segregated_entropy_sa1)
print(segregated_entropy_sa2)
print(segregated_entropy_sa3)
print(segregated_entropy_sa4)
print(segregated_entropy_sa5)
print(segregated_entropy_sa6)
print(segregated_entropy_sa7)
print(segregated_entropy_sa8)
print(segregated_entropy_sa9)
print(segregated_entropy_sa10)
print(segregated_entropy_sa11)
print(segregated_entropy_sa12)
print(segregated_entropy_sa13)
print(segregated_entropy_sa14)
print(segregated_entropy_sa15)
print(segregated_entropy_sa16)
print(segregated_entropy_sa17)
print(segregated_entropy_sa18)
print(segregated_entropy_sa19)
print(segregated_entropy_sa20)
print(segregated_entropy_sa21)
print(segregated_entropy_sa22)
print(segregated_entropy_sa23)
print(segregated_entropy_sa24)
print(segregated_entropy_sa25)

Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
Index: []
Empty DataFrame
Columns: [HIGH_POP_W, HIGH_POP_NW, ABS_DIFF, ENTROPY_IND]
In

In [72]:
# legend_handles = [Patch(facecolor='black', label='HAM'),
#                   Patch(facecolor='green', label='LIN'),
#                   Patch(facecolor='orange', label='SUL'),
#                    Patch(facecolor='red', label='GUI'),
#                    Patch(facecolor='blue', label='WAT'),
#                    Patch(facecolor='magenta', label='FDE'),
#                    Patch(facecolor='cyan', label='FHR'),
#                    Patch(facecolor='yellow', label='EVE'),
#                    Patch(facecolor='tab:olive', label='KWC')
#                 ]

# ax = spas.boundary.plot(figsize=(15,14))
# spas[spas['ELEM_CODE'] == 'HAM'].plot(ax=ax, color='black')
# spas[spas['ELEM_CODE'] == 'LIN'].plot(ax=ax, color='green')
# spas[spas['ELEM_CODE'] == 'SUL'].plot(ax=ax, color='orange')

# spas[spas['ELEM_CODE'] == 'GUI'].plot(ax=ax, color='red')
# spas[spas['ELEM_CODE'] == 'WAT'].plot(ax=ax, color='blue')
# spas[spas['ELEM_CODE'] == 'FDE'].plot(ax=ax, color='magenta')

# spas[spas['ELEM_CODE'] == 'FHR'].plot(ax=ax, color='cyan')
# spas[spas['ELEM_CODE'] == 'EVE'].plot(ax=ax, color='yellow')
# spas[spas['ELEM_CODE'] == 'KWC'].plot(ax=ax, color='tab:olive')

# # ax.legend
# ax.legend(handles=legend_handles)

In [73]:
# from numpy import concatenate
# import pandas as pd

# col_level_1 = np.concatenate([[i]*2 for i in interested_districts])
# col_level_2 = ['W', 'NW'] * len(interested_districts)
# cols_combined = zip(col_level_1, col_level_2)
# col_index = pd.MultiIndex.from_tuples(cols_combined, names=["District", "Group"])
# row_index = pd.Index(range(1,26))
# combined_df = pd.DataFrame(data=data_array, index = row_index, columns = col_index)
# combined_df 

In [74]:
# data_raw = spas_grouped.loc[interested_districts, ['ELEM_POP_W','ELEM_POP_NW']]
# data_original = data_raw.values.reshape((1,len(interested_districts)*2))
# original_df =  pd.DataFrame(data=data_original, columns = col_index)
# # print(data_raw.values)
# original_df

In [75]:
# white_column=[('GUI','W'), ('SUL','W'), ('LIN','W'),  ('FHR','W'),  ('HAM','W'),  ('KWC','W'), ('WAT','W'), ('EVE','W'), ('FDE', 'W')]
# ax, props = combined_df.boxplot(column=white_column, figsize=(15,14), return_type='both', patch_artist=True)
# ax.set_title('White Percentages')
# ax.set_xlabel('Districts')
# ax.set_ylabel('Percentages')

In [76]:
# nonwhite_column=[('GUI','NW'), ('SUL','NW'), ('LIN','NW'),  ('FHR','NW'),  ('HAM','NW'),  ('KWC','NW'), ('WAT','NW'), ('EVE','NW'), ('FDE','NW')]
# ax, props = combined_df.boxplot(column=nonwhite_column, figsize=(15,14), return_type='both', patch_artist=True)
# ax.set_title('Non-White Percentages')
# ax.set_xlabel('Districts')
# ax.set_ylabel('Percentages')
# for box in props['boxes']:
#     box.set_facecolor('orange')

In [77]:
# import matplotlib.pyplot as plt

# ax, props = combined_df.boxplot(figsize=(15,14), return_type='both', patch_artist=True, manage_ticks=False)
# ax.set_title('Whites vs Non-whites Percentages')
# ax.set_xlabel('Districts')
# ax.set_ylabel('Percentages')
# num_boxes = len(props['boxes'])
# for i in range(1, num_boxes, 2):
#     props['boxes'][i].set_facecolor('orange')
    
# legend_handles = [Patch(facecolor='blue', label='White Pop'),
#                 Patch(facecolor='orange', label='Non-White Pop')]
# ax.legend(handles=legend_handles)

# plt.tick_params(labelbottom=False)

In [78]:
# ax = combined_df['GUI'].plot(marker='o', subplots=True)
# pic_folder = 'Combined_linegraphs_andboxplots'

# ax = combined_df['GUI'].plot(marker='o')
# ax.set_title("GUI")
# x = np.arange(1,26)
# yw = [spas_grouped.loc['GUI', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['GUI', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# # ax.legend()
# ax.get_figure().savefig('GUI_compare_{}'.format(algo))

In [79]:
# ax =  combined_df['GUI'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('GUI')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')


# ax,bp_key = original_df['GUI'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('GUI_combined_{}'.format(algo))

In [80]:
# ax = combined_df['SUL'].plot(marker='o')
# ax.set_title("SUL")
# yw = [spas_grouped.loc['SUL', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['SUL', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# ax.get_figure().savefig('SUL_compare_{}'.format(algo))

In [81]:
# ax =  combined_df['SUL'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('SUL')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['SUL'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('SUL_combined_{}'.format(algo))

In [82]:
# ax = combined_df['LIN'].plot(marker='o')
# ax.set_title("LIN")
# yw = [spas_grouped.loc['LIN', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['LIN', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# ax.get_figure().savefig('LIN_compare_{}'.format(algo))

In [83]:
# ax =  combined_df['LIN'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('LIN')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['LIN'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
    
# ax.get_figure().savefig('LIN_combined_{}'.format(algo))

In [84]:
# ax = combined_df['FHR'].plot(marker='o')
# ax.set_title("FHR")
# yw = [spas_grouped.loc['FHR', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['FHR', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# ax.get_figure().savefig('FHR_compare_{}'.format(algo))

In [85]:
# ax =  combined_df['FHR'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('FHR')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['FHR'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
    
# ax.get_figure().savefig('FHR_combined_{}'.format(algo))

In [86]:
# ax = combined_df['HAM'].plot(marker='o')
# ax.set_title("HAM")
# yw = [spas_grouped.loc['HAM', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['HAM', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')

# ax.get_figure().savefig('HAM_compare_{}'.format(algo))

In [87]:
# ax =  combined_df['HAM'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('HAM')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['HAM'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('HAM_combined_{}'.format(algo))

In [88]:
# ax = combined_df['KWC'].plot(marker='o')
# ax.set_title("KWC")
# yw = [spas_grouped.loc['KWC', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['KWC', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# ax.get_figure().savefig('KWC_compare_{}'.format(algo))

In [89]:
# ax =  combined_df['KWC'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('KWC')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['KWC'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('KWC_combined_{}'.format(algo))

In [90]:
# ax = combined_df['WAT'].plot(marker='o')
# ax.set_title("WAT")
# yw = [spas_grouped.loc['WAT', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['WAT', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# ax.get_figure().savefig('WAT_compare_{}'.format(algo))

In [91]:
# ax =  combined_df['WAT'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('WAT')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['WAT'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('WAT_combined_{}'.format(algo))

In [92]:
# ax = combined_df['EVE'].plot(marker='o')
# ax.set_title("EVE")
# yw = [spas_grouped.loc['EVE', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['EVE', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')

# ax.get_figure().savefig('EVE_compare_{}'.format(algo))

In [93]:
# ax =  combined_df['EVE'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('EVE')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['EVE'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('EVE_combined_{}'.format(algo))

In [94]:
# ax = combined_df['FDE'].plot(marker='o')
# ax.set_title("FDE")
# yw = [spas_grouped.loc['FDE', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['FDE', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')

# ax.get_figure().savefig('FDE_compare_{}'.format(algo))

# this is for the original data, i think
# I can see that the original data seems to be optimized here, since the gap is closer than the output plans
# The original is generally good though, in most cases - just for this district and SA

In [95]:
# ax =  combined_df['FDE'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('FDE')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['FDE'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('FDE_combined_{}'.format(algo))

In [96]:
# ax = combined_df['SUG'].plot(marker='o')
# ax.set_title("SUG")
# yw = [spas_grouped.loc['SUG', 'ELEM_POP_W']] * 25
# ynw = [spas_grouped.loc['SUG', 'ELEM_POP_NW']] * 25
# ax.plot(x,yw,color='k', linewidth='3')
# ax.plot(x,ynw,color='c', linewidth='3')
# ax.get_figure().savefig('SUG_compare_{}'.format(algo))

In [97]:
# ax =  combined_df['SUG'].boxplot(figsize=(8,6), patch_artist=True)
# ax.set_title('SUG')
# ax.set_xlabel('Groups')
# ax.set_ylabel('Percentages')

# ax,bp_key = original_df['SUG'].boxplot(ax=ax, return_type='both', patch_artist=True)
# for box in bp_key['boxes']:
#     box.set_color('red')
#     box.set_linewidth(3)
    
# ax.get_figure().savefig('SUG_combined_{}'.format(algo))

In [98]:
# combined_df['FDE'].hist()

In [99]:
segregated_entropy
#this is for the original data

Unnamed: 0_level_0,HIGH_POP_W,HIGH_POP_NW,ABS_DIFF,ENTROPY_IND
HIGH_CODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [100]:
# ind_diss


In [101]:
# plt.hist(ind_diss, bins=[y/100 for y in range(28,40)])
# plt.axis([0, 25, 0.25, 0.50])
# plt.show()

#challenge with plotting histogram. Any help will be much appreciated