# Normalization Logic

_Used as a temporary stop-gap to provide normalization for Sun Cloud scores._


To run this notebook install the ArcGIS Python API. To run via Docker, from the Sun Cloud repo directory:

```bash
docker pull esridocker/arcgis-api-python-notebook

docker run -it --rm --name arcgis -p 8889:8888 -e JUPYTER_ENABLE_LAB=yes -v $(pwd):/home/jovyan esridocker/arcgis-api-python-notebook 
```

Then copy the provided URL and make sure to update port 8888 to 8889.



In [None]:
from arcgis.gis import GIS
import getpass

import datetime as dt
import statistics
import numpy as np
import os

from arcgis.features import FeatureLayer
import matplotlib.pyplot as plt

import pandas as pd

In [None]:
gis = GIS(
  url="https://azgeo.maps.arcgis.com",
  username="megge_azgeo",
  password=getpass.getpass("Enter password:")
)

In [None]:
%matplotlib inline

In [None]:
def histo(x, bins = 20):
    """
    Create a histogram for the data
    
    Inputs:
    x -- a list of data points
    bins -- the number of bings for the histogram
    """
    fig, axs = plt.subplots(1, sharey=True)
    axs.hist(x, bins=bins)    
    plt.show()


def normalize(x, reverse = False, log = False, clip_outliers = 3):
    """
    Normalize a series of values into the 0 - 1 range
    
    Inputs:
    x -- a Pandas Series or numpy array_like object
    reverse -- invert the rank order
    log = False -- log values before normalizing
    clip_outliers = 3 -- clip outliers more than x standard distributions from the mean
    """          
    if x.sum() == 0:
        return x
    
    if log == True:
        if min(x) <= 0:
            x = x + (abs(min(x)) + 1)
        x = np.log(x)
        
    if clip_outliers > 0:
        sd = np.std(x)
        upper = np.mean(x) + clip_outliers * sd
        lower = np.mean(x) - clip_outliers * sd
        x = np.maximum(np.minimum(x, upper), lower)
    
    min_val = x.min()
    max_val = x.max()
    
    if reverse == True:
        result = abs((x - max_val)/(max_val - min_val))
    else:
        result = (x - min_val)/(max_val - min_val)

    return result.round(4)

In [None]:
query = 'title: "Sun Cloud Performance Scores" AND type: "Feature Service"'
search_results = gis.content.search(query=query, max_items=10)

def readable_date(portal_stamp):
    return dt.datetime.fromtimestamp(portal_stamp/1000).strftime('%B %d %Y at %I:%M.%S %p')

for content in search_results:
    print(f" {content.title:<40} {content.type:25} {readable_date(content.modified):40}")


In [None]:
item = search_results[0]
item

## Criteria Normalization Logic

In [None]:
[field['name'] for field in item.layers[0].properties['fields']]

In [None]:
layer = item.layers[0]
layer_fset = layer.query()

df = layer_fset.sdf

df['injury_rate_norm'] = normalize(df['injury_rate'])
df['non_motorized_injuries_norm'] = normalize(df['non_motorized_injuries'])
df['excess_expected_crashes_norm'] = normalize(df['excess_expected_crashes'])

df['pavement_pct_poor_norm'] = normalize(df['pavement_pct_poor'])
df['bridge_structural_rating_norm'] = normalize(df['bridge_structural_rating'], reverse = True)
df['bridge_deck_geometry_norm'] = normalize(df['bridge_deck_geometry'], reverse = True)
df['lottr_norm'] = normalize(df['lottr'])
df['avg_delay_norm'] = normalize(df['avg_delay'])
df['vc_ratio_norm'] = normalize(df['vc_ratio'])
df['vmt_change_norm'] = normalize(df['vmt_change'])
df['transit_overlap_norm'] = normalize(df['transit_overlap'].clip(upper=100))
df['transit_ridership_norm'] = normalize(df['transit_ridership'].fillna(0))
df['employment_change_norm'] = normalize(df['employment_change'])
df['critical_freight_norm'] = normalize(df['critical_freight'])
df['households_no_broadband_norm'] = normalize(df['households_no_broadband'])


df = df[['OBJECTID',
         'injury_rate_norm',
         'non_motorized_injuries_norm',
         'excess_expected_crashes_norm',
         'pavement_pct_poor_norm',
         'bridge_structural_rating_norm',
         'bridge_deck_geometry_norm',
         'lottr_norm',
         'avg_delay_norm',
         'vc_ratio_norm',
         'vmt_change_norm',
         'transit_overlap_norm',
         'transit_ridership_norm',
         'employment_change_norm',
         'critical_freight_norm',
         'households_no_broadband_norm']]

update_fset = df.spatial.to_featureset()

layer.edit_features(updates = update_fset)

In [None]:
# calculate normalized score

layer_fset = layer.query()

df = layer_fset.sdf

df['composite_score_unweighted'] = (
    df['injury_rate_norm'].fillna(0) * 10 +
    df['non_motorized_injuries_norm'].fillna(0) * 10 + 
    df['excess_expected_crashes_norm'].fillna(0) * 12 +
    df['pavement_pct_poor_norm'].fillna(0) * 8 +
    df['bridge_structural_rating_norm'].fillna(0) * 4 +
    df['bridge_deck_geometry_norm'].fillna(0) * 4 +
    df['lottr_norm'].fillna(0) * 6 +
    df['avg_delay_norm'].fillna(0) * 4 +
    df['vc_ratio_norm'].fillna(0) * 4 +
    df['vmt_change_norm'].fillna(0) * 6 + 
    df['transit_overlap_norm'].fillna(0) * 4 +
    df['transit_ridership_norm'].fillna(0) * 4 +
    df['employment_change_norm'].fillna(0) * 8 +
    df['critical_freight_norm'].fillna(0) * 8 +
    df['households_no_broadband_norm'].fillna(0) * 8
)


df['composite_score_safety'] = (
    df['injury_rate_norm'].fillna(0) * 30 +
    df['non_motorized_injuries_norm'].fillna(0) * 30 +
    df['excess_expected_crashes_norm'].fillna(0) * 40
)

df['composite_score_assets'] = (
    df['pavement_pct_poor_norm'].fillna(0) * 50 +
    df['bridge_structural_rating_norm'].fillna(0) * 25 + 
    df['bridge_deck_geometry_norm'].fillna(0) * 25
)
# for segments without bridges, reassign weight to pavement
df.loc[df['bridge_deck_geometry_norm'].isna(), 'composite_score_assets'] = df['pavement_pct_poor_norm'].fillna(0)

df['composite_score_mobility'] = (
    df['lottr_norm'].fillna(0) * 20 +
    df['avg_delay_norm'].fillna(0) * 15 +
    df['vc_ratio_norm'].fillna(0) * 15 +
    df['vmt_change_norm'].fillna(0) * 20 +
    df['transit_overlap_norm'].fillna(0) * 15 +
    df['transit_ridership_norm'].fillna(0) * 15
)

df['composite_score_economy'] = (
    df['employment_change_norm'].fillna(0) * 33 + 
    df['critical_freight_norm'].fillna(0) * 33 +
    df['households_no_broadband_norm'].fillna(0) * 34
)


df['composite_score'] = (df['composite_score_unweighted'] + df['composite_score_unweighted'] * df['percent_disadvantaged'].fillna(0)).round(2)
df['composite_score_unweighted'] = df['composite_score_unweighted'].round(2)

# apply updates
df = df[['OBJECTID',
         'composite_score_unweighted', 'composite_score_assets', 
         'composite_score_mobility', 'composite_score_economy',
         'composite_score_safety', 'composite_score']]

update_fset = df.spatial.to_featureset()

layer.edit_features(updates = update_fset)
