In [70]:
import pandas as pd
from datetime import date, datetime, timedelta
import pickle
import numpy as np
from math import sqrt
import copy
import matplotlib.pyplot as plt
%matplotlib inline
import math

In [71]:
NUM_HOTSPOTS = 10

In [72]:
def open_file():
    try:
        data_file = pd.read_csv(DEFAULT_FILE_PATH + CSV_FILE_EXTENSION)
    except:
        print("File not found")
        return pd.DataFrame()
    return data_file

In [73]:
# Define file variables
DEFAULT_FILE_PATH = "CrimeGUI/data"
CSV_FILE_EXTENSION = ".csv"
DATA_FILE = pd.DataFrame()
FILTERED_DATA = pd.DataFrame()
DATA_FILE = open_file()
FIRST_DATE = datetime.strptime(DATA_FILE['Date'].iloc[0], '%d/%m/%Y')
LAST_DATE = datetime.strptime(DATA_FILE['Date'].iloc[len(DATA_FILE) - 1], '%d/%m/%Y')
MODEL_FEATURE_SEPARATOR = "_"
MODEL_PATH = "CrimeGUI/Models/"
COMMA_SPACE = ", "
INCIDENT_COL_KEY = "Todays Reports"
NEIGHBOURHOOD_COL_KEY = "Neighborhood"
DATE_COL_KEY = "Date"

In [74]:
# Define feature selection variables
F_REGRESSION_NAME = "F-Regression"
F_REGRESSION_FILE_TAG = "f_regression"
CHI2_NAME = "Chi-Squared"
CHI2_FILE_TAG = "chi2"
ADABOOST_NAME = "AdaBoost"
ADABOOST_FILE_TAG = "adaboost"
EQUAL_DATA_NAME = "Equal Selection"
EQUAL_DATA_FILE_TAG = "equal_crime_and_business"
ALL_BUS_NAME = "All Business"
ALL_BUS_FILE_TAG = "all_business"
FEATURE_SELECTION = [F_REGRESSION_NAME, CHI2_NAME, ADABOOST_NAME, EQUAL_DATA_NAME, ALL_BUS_NAME]
FEATURES = {
    F_REGRESSION_NAME : ['Reports 1 day ago', 'Reports 2 days ago', 'Reports 3 days ago',
                       'Reports 4 days ago', 'Reports 5 days ago', 'Reports 6 days ago',
                      'Reports 7 days ago','Reports 14 days ago','Reports 30 days ago','Reports 365 days ago'],
    CHI2_NAME : ['South of Market', 'Mission', 'Tenderloin', 'Number of businesses', 
               'Downtown / Union Square', 'Civic Center', 'Reports 365 days ago',
               'Reports 1 day ago','Reports 2 days ago','Reports 14 days ago'],
    ADABOOST_NAME : ['Reports 365 days ago', 'Reports 1 day ago', 'Reports 14 days ago', 'Reports 3 days ago', 
               'Reports 2 days ago', 'Reports 7 days ago', 'Number of businesses',
               'Reports 4 days ago','Reports 5 days ago','Closures 365 days ago'],
    EQUAL_DATA_NAME : ['Number of businesses', 'Last 28 days closures', 'Last 7 days openings',
                          'Last 14 days closures', 'Last 7 days closures','Reports 1 day ago',
                      'Reports 2 days ago', 'Reports 4 days ago', 'Reports 30 days ago', 'Reports 7 days ago'],
    ALL_BUS_NAME : ['Number of businesses', 'Last 28 days closures', 'Last 7 days openings',
                          'Last 14 days closures', 'Last 7 days closures','Number of openings',
                   'Openings 4 days ago','Openings 1 day ago', 'Openings 7 days ago', 'Openings 2 days ago']
    }
FEATURE_FILE_TAGS = {
    F_REGRESSION_NAME : F_REGRESSION_FILE_TAG,
    CHI2_NAME : CHI2_FILE_TAG,
    ADABOOST_NAME : ADABOOST_FILE_TAG,
    EQUAL_DATA_NAME : EQUAL_DATA_FILE_TAG,
    ALL_BUS_NAME : ALL_BUS_FILE_TAG
    }
FEATURE_NAMES_BY_FILE_TAG = {
    F_REGRESSION_FILE_TAG : F_REGRESSION_NAME,
    CHI2_FILE_TAG : CHI2_NAME,
    ADABOOST_FILE_TAG : ADABOOST_NAME,
    EQUAL_DATA_FILE_TAG : EQUAL_DATA_NAME,
    ALL_BUS_FILE_TAG : ALL_BUS_NAME
}

In [75]:
# Define model variables
ANN_NAME = "Multi-Layer\nPerceptron"
ANN_FILE_TAG = "multi_layer_perceptron"
DECISION_TREE_NAME = "Decision Tree"
DECISION_TREE_FILE_TAG = "decision_tree"
ELASTIC_NET_NAME = "Elastic Net"
ELASTIC_NET_FILE_TAG = "elastic_net"
LASSO_NAME = "Lasso"
LASSO_FILE_TAG = "lasso"
LINERAR_REGRESSION_NAME = "Linear \nRegression"
LINERAR_REGRESSION_FILE_TAG = "linear_regression"
RANDOM_FOREST_NAME = "Random \nForest"
RANDOM_FOREST_FILE_TAG = "random_forest"
RIDGE_REGRESSION_NAME = "Ridge \nRegression"
RIDGE_REGRESSION_FILE_TAG = "ridge_regression"
SVM_NAME = "SVM"
SVM_FILE_TAG = "svm"
MODELS = [ANN_NAME,
          DECISION_TREE_NAME,
          ELASTIC_NET_NAME,
          LASSO_NAME,
          LINERAR_REGRESSION_NAME,
          RANDOM_FOREST_NAME,
          RIDGE_REGRESSION_NAME,
          SVM_NAME]
MODEL_FILE_TAGS = {
    ANN_NAME : ANN_FILE_TAG,
    DECISION_TREE_NAME : DECISION_TREE_FILE_TAG,
    ELASTIC_NET_NAME : ELASTIC_NET_FILE_TAG,
    LASSO_NAME : LASSO_FILE_TAG,
    LINERAR_REGRESSION_NAME: LINERAR_REGRESSION_FILE_TAG,
    RANDOM_FOREST_NAME: RANDOM_FOREST_FILE_TAG,
    RIDGE_REGRESSION_NAME: RIDGE_REGRESSION_FILE_TAG,
    SVM_NAME : SVM_FILE_TAG
    }
MODEL_NAMES_BY_FILE_TAG = {
    ANN_FILE_TAG : ANN_NAME,
    DECISION_TREE_FILE_TAG : DECISION_TREE_NAME,
    ELASTIC_NET_FILE_TAG : ELASTIC_NET_NAME,
    LASSO_FILE_TAG : LASSO_NAME,
    LINERAR_REGRESSION_FILE_TAG : LINERAR_REGRESSION_NAME,
    RANDOM_FOREST_FILE_TAG: RANDOM_FOREST_NAME,
    RIDGE_REGRESSION_FILE_TAG : RIDGE_REGRESSION_NAME,
    SVM_FILE_TAG : SVM_NAME
}

In [76]:
PARENT_NEIGHBOURHOODS = {
    "Central Waterfront" : ["Dogpatch"],
    "Eureka Valley" : ["Dolores Heights","Castro"],
    "Buena Vista" : ["Ashbury Heights"],
    "Cole Valley" : ["Parnassus Heights"],
    "Bayview" : ["Apparel City", "Produce Market"],
    "Russian Hill" : ["Aquatic Park / Ft. Mason"],
    "North Beach" : ["Bret Harte"],
    "Western Addition" : ["Cathedral Hill", "Japantown"],
    "Downtown / Union Square" : ["Fairmount", "Chinatown", "Lower Nob Hill", "Polk Gulch"],
    "Mission Terrace" : ["Cayuga"],
    "Northern Waterfront" : ["Fishermans Wharf"],
    "Bernal Heights" : ["Holly Park", "Peralta Heights", "St. Marys Park"],
    "Hunters Point" : ["India Basin"],
    "Forest Hill" : ["Laguna Honda"],
    "Hayes Valley" : ["Lower Haight"],
    "Portola" : ["McLaren Park", "University Mound"],
    "South of Market" : ["Mint Hill"],
    "Stonestown" : ["Parkmerced"],
    "Presidio Heights" : ["Presidio Terrace"],
    "South Beach" : ["Rincon Hill"],
    "Potrero Hill" : ["Showplace Square"],
    "Visitacion Valley" : ["Sunnydale"],
    "Lincoln Park / Ft. Miley" : ["Sutro Heights"],
    "Cow Hollow" : ["Union Street"]
    }

In [77]:
def test_open_file(expected_length,expected_columns):
    df = open_file()
    assert len(df) == expected_length, "Data frame length not as expected."
    actual_columns = df.columns
    for i in range(len(expected_columns)):
        assert expected_columns[i] == actual_columns[i], "Expected column " + expected_columns[i] + " but got " + actual_columns[i]
    print("All tests completed successfully")

In [78]:
def get_all_columns():
    return ['Reports 1 day ago','Reports 2 days ago','Reports 3 days ago','Reports 4 days ago','Reports 5 days ago',
                    'Reports 6 days ago','Reports 7 days ago','Reports 14 days ago','Reports 30 days ago',
                    'Reports 365 days ago','Last 7 days reports','Last 14 days reports','Last 28 days reports',
                    'Number of businesses','Businesses 1 day ago','Businesses 2 days ago','Businesses 3 days ago',
                    'Businesses 4 days ago','Businesses 5 days ago','Businesses 6 days ago','Businesses 7 days ago',
                    'Businesses 14 days ago','Businesses 30 days ago','Businesses 365 days ago','Number of closures',
                    'Closures 1 day ago','Closures 2 days ago','Closures 3 days ago','Closures 4 days ago',
                    'Closures 5 days ago','Closures 6 days ago','Closures 7 days ago','Closures 14 days ago',
                    'Closures 30 days ago','Closures 365 days ago','Last 7 days closures','Last 14 days closures',
                    'Last 28 days closures','Number of openings','Openings 1 day ago','Openings 2 days ago',
                    'Openings 3 days ago','Openings 4 days ago','Openings 5 days ago','Openings 6 days ago',
                    'Openings 7 days ago','Openings 14 days ago','Openings 30 days ago','Openings 365 days ago',
                    'Last 7 days openings','Last 14 days openings','Last 28 days openings','Alamo Square','Anza Vista',
                    'Apparel City','Aquatic Park / Ft. Mason','Balboa Terrace','Bayview','Bernal Heights','Bret Harte',
                    'Buena Vista','Candlestick Point SRA','Castro','Cathedral Hill','Cayuga','Central Waterfront','Chinatown',
                    'Civic Center','Clarendon Heights','Cole Valley','Corona Heights','Cow Hollow','Crocker Amazon',
                    'Diamond Heights','Dogpatch','Dolores Heights','Downtown / Union Square','Duboce Triangle','Eureka Valley',
                    'Excelsior','Fairmount','Financial District','Fishermans Wharf','Forest Hill','Forest Knolls','Glen Park',
                    'Golden Gate Heights','Golden Gate Park','Haight Ashbury','Hayes Valley','Holly Park','Hunters Point',
                    'India Basin','Ingleside','Ingleside Terraces','Inner Richmond','Inner Sunset','Japantown','Laguna Honda',
                    'Lake Street','Lakeshore','Laurel Heights / Jordan Park','Lincoln Park / Ft. Miley','Little Hollywood',
                    'Lone Mountain','Lower Haight','Lower Nob Hill','Lower Pacific Heights','Marina','McLaren Park',
                    'Merced Heights','Merced Manor','Midtown Terrace','Mint Hill','Miraloma Park','Mission','Mission Bay',
                    'Mission Dolores','Mission Terrace','Monterey Heights','Mt. Davidson Manor','Nob Hill','Noe Valley',
                    'North Beach','Northern Waterfront','Oceanview','Outer Mission','Outer Richmond','Outer Sunset',
                    'Pacific Heights','Panhandle','Parkmerced','Parkside','Parnassus Heights','Peralta Heights',
                    'Polk Gulch','Portola','Potrero Hill','Presidio Heights','Presidio National Park','Presidio Terrace',
                    'Produce Market','Rincon Hill','Russian Hill','Seacliff','Sherwood Forest','Showplace Square',
                    'Silver Terrace','South Beach','South of Market','St. Francis Wood','St. Marys Park','Stonestown',
                    'Sunnydale','Sunnyside','Sutro Heights','Telegraph Hill','Tenderloin','Treasure Island','Union Street',
                    'University Mound','Upper Market','Visitacion Valley','West Portal','Western Addition',
                    'Westwood Highlands','Westwood Park','Yerba Buena Island','Friday','Saturday','Sunday','Thursday',
                    'Tuesday','Wednesday','Todays Reports']

In [79]:
expected_columns = get_all_columns()
DEFAULT_FILE_PATH = "tuning_test_data"
expected_length = 819
test_open_file(expected_length,expected_columns)

All tests completed successfully


In [80]:
def get_hotspots(data, model_key, features_key,calendar_date):
    x_data = data.loc[data[DATE_COL_KEY].str.contains(calendar_date)]
    neighbourhoods_data = pd.DataFrame(x_data[NEIGHBOURHOOD_COL_KEY])
    neighbourhoods_data.reset_index(drop=True, inplace=True)
    y_data = pd.DataFrame(x_data[INCIDENT_COL_KEY])
    y_data.reset_index(drop=True, inplace=True)
    features_selected = FEATURES[features_key]
    x_data = x_data[features_selected]
    return load_model(x_data, y_data, neighbourhoods_data, model_key, features_key)

In [81]:
def load_model(x_data, y_data, neighbourhoods_data, model_key, features_key):
    model_tag = MODEL_FILE_TAGS[model_key]
    feature_tag = FEATURE_FILE_TAGS[features_key]
    file_path = MODEL_PATH + model_tag + MODEL_FEATURE_SEPARATOR + feature_tag
    with open(file_path, 'rb') as f:
        model = pickle.load(f)
        return make_prediction(model, x_data, y_data, neighbourhoods_data)

In [82]:
def make_prediction(model, x_data, y_data, neighbourhoods_data):
    y_predict = model.predict(x_data)
    y_actual,y_predict,neighbourhoods_data = merge_sub_neighbourhoods(y_data,y_predict,neighbourhoods_data)
    total_predictions = len(y_predict)
    neighbourhoods_data.reset_index(drop=True, inplace=True)
    prediction_neighbourhoods = neighbourhoods_data[NEIGHBOURHOOD_COL_KEY].to_numpy()
    actual_neighbourhoods = prediction_neighbourhoods.copy()
    indexes = y_actual.argsort()
    y_actual = np.flip(y_actual[indexes])
    actual_neighbourhoods = np.flip(actual_neighbourhoods[indexes])
    indexes = y_predict.argsort()
    y_predict = np.flip(y_predict[indexes])
    prediction_neighbourhoods = np.flip(prediction_neighbourhoods[indexes])
    return y_actual, actual_neighbourhoods, y_predict, prediction_neighbourhoods

In [83]:
def get_non_negative_value(value):
    if value < 0:
        return 0
    else:
        return value

In [84]:
def merge_sub_neighbourhoods(y_data,y_predict,neighbourhoods_data):
    y_data = pd.DataFrame(y_data).to_numpy().flatten()
    indexes_to_remove = []
    for parent_key in PARENT_NEIGHBOURHOODS:
        parent_index = neighbourhoods_data.index[neighbourhoods_data[NEIGHBOURHOOD_COL_KEY] == parent_key].tolist()[0]
        y_predict_parent_value = get_non_negative_value(y_predict[parent_index])
        y_data_parent_value = get_non_negative_value(y_data[parent_index])
        for sub_neighbourhood in PARENT_NEIGHBOURHOODS[parent_key]:
            sub_neighbourhood_index = neighbourhoods_data.index[neighbourhoods_data[NEIGHBOURHOOD_COL_KEY] == sub_neighbourhood].tolist()[0]
            indexes_to_remove.append(sub_neighbourhood_index)
            y_predict_sub_neighbourhood_value = get_non_negative_value(y_predict[sub_neighbourhood_index])
            y_predict_parent_value = y_predict_parent_value + y_predict_sub_neighbourhood_value
            y_data_sub_neighbourhood_value = get_non_negative_value(y_data[sub_neighbourhood_index])
            y_data_parent_value = y_data_parent_value + y_data_sub_neighbourhood_value
        y_predict[parent_index] = y_predict_parent_value
        y_data[parent_index] = y_data_parent_value
    neighbourhoods_data = neighbourhoods_data.drop(neighbourhoods_data.index[indexes_to_remove])
    indexes_to_remove.sort(reverse=True)
    for index in indexes_to_remove:
        y_predict = np.delete(y_predict,index)
        y_data = np.delete(y_data,index)
    return y_data,y_predict,neighbourhoods_data 

In [85]:
def test_get_hotspots(test_model_name,test_feature_name,calendar_date, expected_y_actual, expected_actual_neighbourhoods,
                     expected_y_predict, expected_prediction_neighbourhoods):
    data = open_file()
    y_actual, actual_neighbourhoods, y_predict, prediction_neighbourhoods = get_hotspots(data, 
                                                                                     test_model_name, 
                                                                                     test_feature_name,
                                                                                     calendar_date)
    for i in range(0,len(y_actual)):
        assert y_actual[i] == expected_y_actual[i], "Y actual not as expected."
    for i in range(0,len(expected_y_actual)):
        assert expected_y_actual[i] == y_actual[i], "Y actual not as expected."
        
    for i in range(0,len(y_predict)):
        assert y_predict[i] == expected_y_predict[i], "Y actual not as expected."
    for i in range(0,len(expected_y_predict)):
        assert expected_y_predict[i] == y_predict[i], "Y actual not as expected."   
    for i in range(0,len(actual_neighbourhoods)):
        assert actual_neighbourhoods[i] == expected_actual_neighbourhoods[i], "Actual neighbourhoods not as expected."
    for i in range(0,len(expected_actual_neighbourhoods)):
        assert expected_actual_neighbourhoods[i] == actual_neighbourhoods[i], "Actual neighbourhoods not as expected."    
    for i in range(0,len(prediction_neighbourhoods)):
        assert prediction_neighbourhoods[i] == expected_prediction_neighbourhoods[i], "Predicted neighbourhoods not as expected."  
    for i in range(0,len(expected_prediction_neighbourhoods)):
        assert expected_prediction_neighbourhoods[i] == prediction_neighbourhoods[i], "Predicted neighbourhoods not as expected."
    print("Tests completed successfully")

In [86]:
DEFAULT_FILE_PATH = "tuning_test_data"
test_model_name = "Tuning Template Test"
test_model_file_tag = "tuning_template_test_model"
test_feature_file_tag = "arbitrary_name"
test_feature_name = "Arbitrary Feature Name"
test_feature_features = ['Reports 1 day ago', 'Reports 2 days ago', 'Reports 3 days ago',
                       'Reports 4 days ago', 'Reports 5 days ago', 'Reports 6 days ago',
                      'Reports 7 days ago','Reports 14 days ago','Reports 30 days ago','Reports 365 days ago']
FEATURES[test_feature_name] = test_feature_features
FEATURE_FILE_TAGS[test_feature_name] = test_feature_file_tag
FEATURE_NAMES_BY_FILE_TAG[test_feature_file_tag] = test_feature_name
MODEL_FILE_TAGS[test_model_name] = test_model_file_tag
calendar_date = "05/01/2021"

In [87]:
expected_y_actual = [5, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
expected_actual_neighbourhoods = ['Downtown / Union Square', 'Bernal Heights', 'Western Addition', 'Portola',
 'Bayview', 'Eureka Valley', 'South Beach', 'Lincoln Park / Ft. Miley',
 'Hunters Point', 'Potrero Hill', 'Presidio Heights', 'Cow Hollow',
 'Russian Hill', 'Cole Valley', 'Mission Terrace', 'Hayes Valley',
 'Stonestown', 'Central Waterfront', 'Northern Waterfront', 'Buena Vista',
 'Visitacion Valley', 'North Beach', 'Forest Hill', 'South of Market',
 'Haight Ashbury', 'Golden Gate Park', 'Yerba Buena Island',
 'Golden Gate Heights', 'Ingleside Terraces', 'Inner Richmond',
 'Inner Sunset', 'Lake Street', 'Lakeshore', 'Ingleside', 'Diamond Heights',
 'Glen Park', 'Forest Knolls', 'Financial District', 'Excelsior',
 'Duboce Triangle', 'Crocker Amazon', 'Corona Heights', 'Clarendon Heights',
 'Civic Center', 'Candlestick Point SRA', 'Balboa Terrace', 'Anza Vista',
 'Laurel Heights / Jordan Park', 'Merced Manor', 'Little Hollywood',
 'St. Francis Wood', 'Panhandle', 'Parkside', 'Presidio National Park',
 'Seacliff', 'Sherwood Forest', 'Silver Terrace', 'Sunnyside', 'Outer Sunset',
 'Telegraph Hill', 'Tenderloin', 'Treasure Island', 'Upper Market',
 'West Portal', 'Westwood Highlands', 'Pacific Heights', 'Outer Richmond',
 'Lone Mountain', 'Mission', 'Lower Pacific Heights', 'Marina',
 'Merced Heights', 'Westwood Park', 'Midtown Terrace', 'Miraloma Park',
 'Mission Bay', 'Outer Mission', 'Mission Dolores', 'Monterey Heights',
 'Mt. Davidson Manor', 'Nob Hill', 'Noe Valley', 'Oceanview', 'Alamo Square']
expected_y_predict = expected_y_actual
expected_prediction_neighbourhoods = expected_actual_neighbourhoods

In [88]:
test_get_hotspots(test_model_name,test_feature_name,calendar_date, expected_y_actual, expected_actual_neighbourhoods,
                     expected_y_predict, expected_prediction_neighbourhoods)

Tests completed successfully


In [103]:
def calculate_accuracy(y_actual, actual_neighbourhoods, y_predict, prediction_neighbourhoods,num_hotspots):
    print(y_actual)
    print(type(y_actual))
    print(actual_neighbourhoods)
    print(type(actual_neighbourhoods))
    print(actual_neighbourhoods.shape)
    print(y_predict)
    print(type(y_predict))
    print(prediction_neighbourhoods)
    print(type(prediction_neighbourhoods))
    print(num_hotspots)
    print(type(num_hotspots))
    total_predictions = len(y_predict)
    predicted_hotspots, actual_hotspots = determine_hotspots(y_actual, 
                                                             actual_neighbourhoods, 
                                                             prediction_neighbourhoods,
                                                             num_hotspots)
    num_predictions = len(predicted_hotspots)
    classification_scores = calculate_standard_scores(num_predictions,
                                                      predicted_hotspots,
                                                      actual_hotspots,
                                                      total_predictions)
    misclassification_scores = get_missed_incidents(predicted_hotspots, 
                                                    actual_hotspots, 
                                                    y_actual, 
                                                    y_predict,
                                                    num_predictions,
                                                    actual_neighbourhoods)
    print(str(classification_scores + misclassification_scores))
    return classification_scores + misclassification_scores

In [90]:
def get_missed_incidents(predicted_hotspots, actual_hotspots, y_actual, y_predict,num_predictions,actual_neighbourhoods):
    incidents_correct = 0
    incidents_missed = 0
    additional_incidents_caught = 0
    for neighbourhood in predicted_hotspots:
        index = np.where(actual_neighbourhoods == neighbourhood)[0]
        value = y_actual[index]
        if neighbourhood in actual_hotspots:
            incidents_correct += value
        else:
            additional_incidents_caught += value
    i = 0
    i_limit = 0
    lowest_actual = y_actual[num_predictions-1]
    while i < num_predictions:
        if y_actual[i] != lowest_actual:
            neighbourhood = actual_hotspots[i]
            if neighbourhood not in predicted_hotspots:
                index = np.where(actual_neighbourhoods == neighbourhood)[0]
                value = y_actual[index]
                incidents_missed += value
            i += 1
        else:
            i_limit = i
            i = num_predictions
    i = i_limit
    remaining_hotspots_to_find = num_predictions - i
    hotspots_found = 0
    while i < len(actual_hotspots):
        neighbourhood = actual_hotspots[i]
        if neighbourhood in predicted_hotspots:
            hotspots_found += 1
        i += 1
    num_lowest_value_hotspots_missing = remaining_hotspots_to_find - hotspots_found
    lowest_values_missed = lowest_actual * num_lowest_value_hotspots_missing
    incidents_missed += lowest_values_missed
    total_actual_hotspots = incidents_correct + incidents_missed
    net_missed = incidents_missed - additional_incidents_caught
    misclassification_severity = net_missed / total_actual_hotspots
    return [misclassification_severity]

In [91]:
def determine_hotspots(y_actual, actual_neighbourhoods, prediction_neighbourhoods,num_hotspots):
    predicted_hotspots = prediction_neighbourhoods[:num_hotspots]
    actual_hotspots = actual_neighbourhoods[:num_hotspots]
    lowest_hotspot_value = y_actual[num_hotspots-1]
    i = num_hotspots
    while i < len(actual_neighbourhoods):
        if y_actual[i] == lowest_hotspot_value:
            np.append(actual_hotspots,actual_neighbourhoods[i])
            i += 1
        else:
            i = len(actual_neighbourhoods)
    return predicted_hotspots, actual_hotspots

In [92]:
def calculate_standard_scores(num_predictions,predicted_hotspots,actual_hotspots,total_predictions):
    #true positives
    tp = 0
    #false positives
    fp = 0
    for i in range(num_predictions):
        if predicted_hotspots[i] in actual_hotspots:
            tp += 1
        else:
            fp +=1
    #true negatives
    tn = total_predictions-num_predictions-fp
    #false negatives
    fn = fp
    #sensitivity/recall
    sensitivity = 0
    if tp + fn != 0:
        sensitivity = tp / (tp + fn)
    #specificity
    specificity = 0
    if tn + fp !=0:
        specificity = tn / (tn + fp)
    #precision
    precision = 0
    if tp + fp !=0:
        precision = tp / (tp + fp)
    #f1 - incorporates both sensitivity/recall and precision
    f1 = 0
    if precision + sensitivity != 0:
        f1 = 2 * (precision * sensitivity) / (precision + sensitivity)
    #matthews correlation coefficient
    mcc = 0
    if sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn+fn)) != 0:
        mcc = ((tp * tn) - (fp * fn)) / (sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn+fn)))
    return [sensitivity, specificity, precision, f1, mcc]

In [93]:
def get_results(num_hotspots):
    all_scores = {}
    for score_key in SCORE_KEYS:
        all_scores[score_key] = []
    algorithm_names = []
    for model_key in MODELS:
        for feature_key in FEATURES:
            scores = score_algorithm(model_key, feature_key,num_hotspots)
            for score_key in SCORE_KEYS:
                all_scores[score_key].append(scores[score_key])
            algorithm_names.append(model_key + "\n" + feature_key)
    return all_scores, algorithm_names

In [94]:
def score_algorithm(model_key, feature_key,num_hotspots):
    start_date = FIRST_DATE
    all_scores = {}
    for score_key in SCORE_KEYS:
        all_scores[score_key] = []
    while start_date <= LAST_DATE:
        y_actual, actual_neighbourhoods, y_predict, prediction_neighbourhoods = get_hotspots(DATA_FILE, 
                                                                                model_key, 
                                                                                feature_key, 
                                                                                str(start_date.strftime("%d/%m/%Y")))
        results = calculate_accuracy(y_actual, actual_neighbourhoods, y_predict, prediction_neighbourhoods,num_hotspots)
        for i in range(len(SCORE_KEYS)):
            all_scores[SCORE_KEYS[i]].append(results[i])
        start_date += timedelta(days=1)
    averages = {}
    for score_key in SCORE_KEYS:
        averages[score_key] = sum(all_scores[score_key]) / len (all_scores[score_key])
    return averages

In [95]:
def best_x_scores(score_metric,x,algorithms,results,num_hotspots):
    all_labels = algorithms.copy()
    x_labels = []
    best_scores = []
    scores = results[score_metric].copy()
    num_scores = len(scores)
    x = validate_x(num_scores,x)
    i = 0
    while i < x:
        if HIGHEST_IS_BEST[score_metric]:
            index = np.argmax(scores)
        else:
            index = np.argmin(scores)
        x_labels.append(all_labels[index])
        best_scores.append(scores[index])
        all_labels = np.delete(all_labels,index)
        scores = np.delete(scores,index)
        i+=1
    y_select = np.arange(len(best_scores))
    plt.figure(figsize = (15,15))
    plt.xticks(y_select,x_labels)
    #plt.xticks(rotation=45)
    plt.title("Top " + str(x) + " " + score_metric + " scores\nwhen predicting the top " 
              + str(num_hotspots) + " Crime Hotspots")
    plt.xlabel("Algorithm")
    plt.ylabel(score_metric + " score")
    high_score = max(best_scores)
    low_score = min(best_scores)
    min_y = low_score - 0.5 * (high_score - low_score)
    max_y = high_score + 0.5 * (high_score - low_score)
    plt.ylim(bottom = min_y, top = max_y)
    bar = plt.bar(y_select, best_scores)
    return bar

In [96]:
def get_labels():
    labels = []
    for algorithm in algorithms:
        for sel_method in sel_methods:
            labels.append(algorithm_display_names[algorithms.index(algorithm)] + "\nusing\n" + 
                          feature_select_display_names[np.nonzero(sel_methods == sel_method)[0][0]] + "\ndataset")
    return labels

In [97]:
def validate_x(num_scores,x):
    soft_cap = 15
    if x > num_scores or x < 1:
        if num_scores > soft_cap:
            return soft_cap
        else:
            return num_scores
    return x

In [98]:
def evaluate_models(num_hotspots_range):
    bars = []
    for i in num_hotspots_range:
        print(i)
        num_hotspots = i
        results, algorithms = get_results(num_hotspots)
        for metric in SCORE_KEYS:
            top_x = 10
            bars.append(best_x_scores(metric,top_x,algorithms,results,num_hotspots))
    return bars

In [99]:
SENSITIVITY_KEY = "Sensitivity"
SPECIFICITY_KEY = "Specificity"
PRECISION_KEY = "Precision"
F1_KEY = "F1"
MCC_KEY = "MCC"
SEVERITY_KEY = "Lowest Misclassification Severity"
SCORE_KEYS = [
    SENSITIVITY_KEY,
    SPECIFICITY_KEY,
    PRECISION_KEY,
    F1_KEY,
    MCC_KEY,
    SEVERITY_KEY
]
HIGHEST_IS_BEST = {
    SENSITIVITY_KEY: True,
    SPECIFICITY_KEY: True,
    PRECISION_KEY: True,
    F1_KEY:True,
    MCC_KEY: True,
    SEVERITY_KEY: False
}

In [100]:
num_hotspots_range = [2]

In [101]:
DEFAULT_FILE_PATH = "CrimeGUI/data"
CSV_FILE_EXTENSION = ".csv"
DATA_FILE = pd.DataFrame()
FILTERED_DATA = pd.DataFrame()
DATA_FILE = open_file()
FIRST_DATE = datetime.strptime(DATA_FILE['Date'].iloc[0], '%d/%m/%Y')
LAST_DATE = datetime.strptime(DATA_FILE['Date'].iloc[len(DATA_FILE) - 1], '%d/%m/%Y')
MODEL_FEATURE_SEPARATOR = "_"
MODEL_PATH = "CrimeGUI/Models/"
COMMA_SPACE = ", "
INCIDENT_COL_KEY = "Todays Reports"
NEIGHBOURHOOD_COL_KEY = "Neighborhood"
DATE_COL_KEY = "Date"
F_REGRESSION_NAME = "F-Regression"
F_REGRESSION_FILE_TAG = "f_regression"
CHI2_NAME = "Chi-Squared"
CHI2_FILE_TAG = "chi2"
ADABOOST_NAME = "AdaBoost"
ADABOOST_FILE_TAG = "adaboost"
EQUAL_DATA_NAME = "Equal Selection"
EQUAL_DATA_FILE_TAG = "equal_crime_and_business"
ALL_BUS_NAME = "All Business"
ALL_BUS_FILE_TAG = "all_business"
FEATURE_SELECTION = [F_REGRESSION_NAME, CHI2_NAME, ADABOOST_NAME, EQUAL_DATA_NAME, ALL_BUS_NAME]
FEATURES = {
    F_REGRESSION_NAME : ['Reports 1 day ago', 'Reports 2 days ago', 'Reports 3 days ago',
                       'Reports 4 days ago', 'Reports 5 days ago', 'Reports 6 days ago',
                      'Reports 7 days ago','Reports 14 days ago','Reports 30 days ago','Reports 365 days ago'],
    CHI2_NAME : ['South of Market', 'Mission', 'Tenderloin', 'Number of businesses', 
               'Downtown / Union Square', 'Civic Center', 'Reports 365 days ago',
               'Reports 1 day ago','Reports 2 days ago','Reports 14 days ago']
    }
FEATURE_FILE_TAGS = {
    F_REGRESSION_NAME : F_REGRESSION_FILE_TAG,
    CHI2_NAME : CHI2_FILE_TAG,
    ADABOOST_NAME : ADABOOST_FILE_TAG,
    EQUAL_DATA_NAME : EQUAL_DATA_FILE_TAG,
    ALL_BUS_NAME : ALL_BUS_FILE_TAG
    }
FEATURE_NAMES_BY_FILE_TAG = {
    F_REGRESSION_FILE_TAG : F_REGRESSION_NAME,
    CHI2_FILE_TAG : CHI2_NAME,
    ADABOOST_FILE_TAG : ADABOOST_NAME,
    EQUAL_DATA_FILE_TAG : EQUAL_DATA_NAME,
    ALL_BUS_FILE_TAG : ALL_BUS_NAME
}
ANN_NAME = "Multi-Layer\nPerceptron"
ANN_FILE_TAG = "multi_layer_perceptron"
DECISION_TREE_NAME = "Decision Tree"
DECISION_TREE_FILE_TAG = "decision_tree"
ELASTIC_NET_NAME = "Elastic Net"
ELASTIC_NET_FILE_TAG = "elastic_net"
LASSO_NAME = "Lasso"
LASSO_FILE_TAG = "lasso"
LINERAR_REGRESSION_NAME = "Linear \nRegression"
LINERAR_REGRESSION_FILE_TAG = "linear_regression"
RANDOM_FOREST_NAME = "Random \nForest"
RANDOM_FOREST_FILE_TAG = "random_forest"
RIDGE_REGRESSION_NAME = "Ridge \nRegression"
RIDGE_REGRESSION_FILE_TAG = "ridge_regression"
SVM_NAME = "SVM"
SVM_FILE_TAG = "svm"
MODELS = [LASSO_NAME,
          LINERAR_REGRESSION_NAME]
MODEL_FILE_TAGS = {
    ANN_NAME : ANN_FILE_TAG,
    DECISION_TREE_NAME : DECISION_TREE_FILE_TAG,
    ELASTIC_NET_NAME : ELASTIC_NET_FILE_TAG,
    LASSO_NAME : LASSO_FILE_TAG,
    LINERAR_REGRESSION_NAME: LINERAR_REGRESSION_FILE_TAG,
    RANDOM_FOREST_NAME: RANDOM_FOREST_FILE_TAG,
    RIDGE_REGRESSION_NAME: RIDGE_REGRESSION_FILE_TAG,
    SVM_NAME : SVM_FILE_TAG
    }
MODEL_NAMES_BY_FILE_TAG = {
    ANN_FILE_TAG : ANN_NAME,
    DECISION_TREE_FILE_TAG : DECISION_TREE_NAME,
    ELASTIC_NET_FILE_TAG : ELASTIC_NET_NAME,
    LASSO_FILE_TAG : LASSO_NAME,
    LINERAR_REGRESSION_FILE_TAG : LINERAR_REGRESSION_NAME,
    RANDOM_FOREST_FILE_TAG: RANDOM_FOREST_NAME,
    RIDGE_REGRESSION_FILE_TAG : RIDGE_REGRESSION_NAME,
    SVM_FILE_TAG : SVM_NAME
}

In [104]:
bars = evaluate_models(num_hotspots_range)

2
[24 19 16 16 11 11  9  9  9  7  7  6  6  6  6  6  5  5  5  4  4  3  3  3
  3  3  3  3  3  3  3  3  3  2  2  2  2  2  2  2  2  2  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0]
<class 'numpy.ndarray'>
['Downtown / Union Square' 'South of Market' 'Tenderloin' 'Bayview'
 'Bernal Heights' 'Northern Waterfront' 'Financial District'
 'Outer Sunset' 'Mission' 'Outer Richmond' 'Civic Center' 'Portola'
 'Marina' 'Noe Valley' 'Eureka Valley' 'Western Addition'
 'Laurel Heights / Jordan Park' 'Russian Hill' 'Mission Dolores'
 'Golden Gate Heights' 'Potrero Hill' 'Lower Pacific Heights'
 'Mission Bay' 'Hunters Point' 'Mission Terrace' 'Excelsior'
 'Clarendon Heights' 'Merced Heights' 'Central Waterfront' 'Cow Hollow'
 'North Beach' 'Stonestown' 'Pacific Heights' 'Presidio Heights'
 'Treasure Island' 'Silver Terrace' 'Inner Richmond' 'Visitacion Valley'
 'Panhandle' 'Outer Mission' 'Nob Hill' 'Crocker Amazon' 'Haight

<class 'numpy.ndarray'>
(84,)
[45.89220789 36.35847925 29.10905726 28.8504667  19.24514023 18.06653846
 16.99955048 16.95119311 14.30838688 13.39302354 12.88330837 12.13896474
 11.94811394 11.3294709  11.092971   10.77159217  9.55685754  8.87614433
  8.50964945  8.06384341  7.505382    7.13903749  6.67080036  6.6452923
  6.43448288  6.2582795   5.74788469  5.26227496  5.05950797  4.81847581
  4.81795273  4.71564291  4.63642545  4.48438238  4.45151663  4.4420377
  4.28253535  4.22922901  4.16236565  4.13730952  3.60460448  3.56856944
  3.48124205  3.04320639  2.96626157  2.94855495  2.86317738  2.65128404
  2.49238859  2.45217373  2.45092966  2.44299312  2.3189625   2.2954171
  2.27126179  2.22650137  1.82725618  1.75118687  1.60490888  1.53230893
  1.51075831  1.40886608  1.39003638  1.26293588  1.05904388  0.99575045
  0.95820698  0.90028137  0.88798287  0.84108032  0.66200909  0.59091066
  0.49013302  0.47457327  0.4470441   0.32484259  0.27914332  0.27564835
  0.27564835  0.24801141

  0.27181864  0.24811917  0.23578678  0.07672342  0.07672342  0.07672342]
<class 'numpy.ndarray'>
['South of Market' 'Tenderloin' 'Downtown / Union Square' 'Mission'
 'Bayview' 'Potrero Hill' 'Western Addition' 'Civic Center'
 'Northern Waterfront' 'Bernal Heights' 'Portola' 'Marina'
 'Pacific Heights' 'North Beach' 'Inner Richmond' 'Outer Sunset'
 'Russian Hill' 'Visitacion Valley' 'Outer Richmond' 'Financial District'
 'Eureka Valley' 'Noe Valley' 'Hunters Point' 'Cow Hollow' 'Mission Bay'
 'Stonestown' 'Mission Dolores' 'Mission Terrace' 'Excelsior'
 'Lower Pacific Heights' 'Parkside' 'Hayes Valley' 'Inner Sunset'
 'South Beach' 'Silver Terrace' 'Duboce Triangle' 'Nob Hill'
 'Presidio Heights' 'Central Waterfront' 'Clarendon Heights'
 'Golden Gate Park' 'Haight Ashbury' 'Alamo Square' 'Panhandle'
 'Crocker Amazon' 'Cole Valley' 'Lone Mountain' 'Golden Gate Heights'
 'Buena Vista' 'Upper Market' 'Treasure Island' 'Outer Mission'
 'Diamond Heights' 'Oceanview' 'Laurel Heights / Jordan

  0.27181864  0.24811917  0.07672342  0.07672342  0.07672342  0.07672342]
<class 'numpy.ndarray'>
['South of Market' 'Downtown / Union Square' 'Mission' 'Tenderloin'
 'Bayview' 'Potrero Hill' 'Civic Center' 'Western Addition' 'Portola'
 'Bernal Heights' 'Northern Waterfront' 'Financial District'
 'Eureka Valley' 'Outer Sunset' 'Outer Richmond' 'Marina'
 'Pacific Heights' 'North Beach' 'Mission Bay' 'Russian Hill'
 'Hunters Point' 'Mission Terrace' 'Inner Richmond' 'Inner Sunset'
 'Excelsior' 'Cow Hollow' 'Lower Pacific Heights' 'Stonestown'
 'Central Waterfront' 'Visitacion Valley' 'Silver Terrace'
 'Laurel Heights / Jordan Park' 'South Beach' 'Duboce Triangle'
 'Noe Valley' 'Hayes Valley' 'Mission Dolores' 'Presidio Heights'
 'Nob Hill' 'Panhandle' 'Treasure Island' 'Cole Valley' 'Crocker Amazon'
 'Upper Market' 'Outer Mission' 'Golden Gate Park' 'Lone Mountain'
 'Diamond Heights' 'Haight Ashbury' 'Buena Vista' 'Merced Heights'
 'Sunnyside' 'Golden Gate Heights' 'Ingleside' 'Alamo Squ

  0.204271    0.07672342  0.07672342  0.07672342  0.07672342  0.07672342]
<class 'numpy.ndarray'>
['South of Market' 'Downtown / Union Square' 'Mission' 'Tenderloin'
 'Potrero Hill' 'Bayview' 'Western Addition' 'Civic Center' 'Outer Sunset'
 'Cow Hollow' 'Bernal Heights' 'Portola' 'Marina' 'Russian Hill'
 'Outer Richmond' 'Inner Richmond' 'Northern Waterfront' 'Hunters Point'
 'Pacific Heights' 'Visitacion Valley' 'Inner Sunset' 'Excelsior'
 'North Beach' 'Eureka Valley' 'Mission Bay' 'Hayes Valley'
 'Financial District' 'Silver Terrace' 'Golden Gate Heights'
 'Central Waterfront' 'Stonestown' 'Mission Terrace' 'Buena Vista'
 'Noe Valley' 'Nob Hill' 'Lower Pacific Heights' 'Golden Gate Park'
 'Mission Dolores' 'Duboce Triangle' 'Parkside' 'South Beach' 'Anza Vista'
 'Lone Mountain' 'Treasure Island' 'Crocker Amazon'
 'Laurel Heights / Jordan Park' 'Upper Market' 'Presidio Heights'
 'Sunnyside' 'Telegraph Hill' 'Glen Park' 'Cole Valley' 'Alamo Square'
 'Mt. Davidson Manor' 'Outer Missio

<class 'numpy.ndarray'>
(84,)
[49.0629136  34.01100081 31.59920123 25.38554855 20.865559   19.75103614
 16.77487139 16.54564648 16.47657445 15.16494031 14.64818999 11.70033626
 11.07254295 11.01038151 10.65593892 10.60692359 10.51729582 10.44976945
 10.02769726  9.72626558  9.36800296  8.79908038  8.13676523  7.89255265
  6.47445178  6.32413226  6.12796726  6.07646283  5.98731505  5.83933787
  5.34501664  5.25096898  5.05647166  4.90784941  4.45563381  4.2753904
  3.87909831  3.87595977  3.50064774  3.49520865  3.46184793  3.42634899
  3.35067223  3.09666812  2.88947107  2.76410375  2.63810464  2.53487453
  2.52188904  2.29913694  2.23557981  2.2246485   2.08936027  1.96420843
  1.94131502  1.70493838  1.66405928  1.53147998  1.51407679  1.44508335
  1.35869451  1.29284384  1.2663716   1.18809388  1.04740575  1.01557959
  0.97956202  0.96684575  0.93588873  0.91641786  0.74587966  0.55842958
  0.49623833  0.47806825  0.43820667  0.41940716  0.41075891  0.24801141
  0.23936316  0.076723

[23 20 13 13 12 12 11 10 10 10  9  9  8  8  7  5  5  5  4  4  4  4  4  3
  3  3  3  3  3  3  3  3  3  3  2  2  2  2  2  2  2  2  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0]
<class 'numpy.ndarray'>
['South of Market' 'Tenderloin' 'Mission' 'Civic Center' 'Bayview'
 'Downtown / Union Square' 'Marina' 'Potrero Hill' 'Western Addition'
 'Hunters Point' 'North Beach' 'Outer Richmond' 'Outer Sunset' 'Ingleside'
 'Inner Richmond' 'Mission Terrace' 'Hayes Valley' 'Pacific Heights'
 'Outer Mission' 'Bernal Heights' 'Financial District' 'Parkside'
 'Inner Sunset' 'Northern Waterfront' 'Mission Bay' 'Mission Dolores'
 'Mt. Davidson Manor' 'Nob Hill' 'Haight Ashbury' 'Cow Hollow'
 'Eureka Valley' 'Central Waterfront' 'Lower Pacific Heights'
 'Golden Gate Park' 'Oceanview' 'Portola' 'Merced Heights'
 'Golden Gate Heights' 'Russian Hill' 'Seacliff' 'Visitacion Valley'
 'Excelsior' 'Merced Manor' 'Ingleside Terraces' 

(84,)
[39.0887053  38.79472763 35.46418437 32.87008997 21.70025632 21.093349
 16.81504884 14.80719822 14.32207413 12.81488538 12.27213864 11.55456228
 11.48305414 10.40476793 10.35408574  9.84293898  9.79515236  9.65326186
  9.25405191  9.24395111  9.14402567  9.14118886  9.02419996  7.81147519
  7.74414055  7.68187755  6.88053275  5.995657    5.71648057  5.51326383
  5.41381584  5.16036092  4.79273394  4.76027109  4.71577189  4.55445432
  4.0824206   3.91393438  3.89130981  3.73141383  3.69053026  3.16793495
  3.00669616  2.9241888   2.83599319  2.83165438  2.69383312  2.48997358
  2.35016986  2.33503922  2.33058263  2.20647007  2.1221051   2.03306903
  1.95810857  1.86946797  1.81950978  1.76835282  1.73949766  1.71122985
  1.62410265  1.44117416  1.3287943   1.30085954  1.25075581  1.22707696
  1.163917    1.0739138   0.93397916  0.92058418  0.87907833  0.69820179
  0.55336719  0.51118181  0.48663805  0.43471171  0.36333436  0.27914332
  0.23936316  0.07672342  0.07672342  0.0767234

[25 22 15 15 10  9  8  8  8  7  7  7  6  5  5  5  5  5  5  5  5  5  4  4
  4  4  3  3  3  3  3  3  3  2  2  2  2  2  2  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0]
<class 'numpy.ndarray'>
['South of Market' 'Tenderloin' 'Bayview' 'Downtown / Union Square'
 'Mission' 'Potrero Hill' 'Portola' 'Bernal Heights' 'Cow Hollow'
 'Civic Center' 'Excelsior' 'Russian Hill' 'Nob Hill' 'Marina'
 'Outer Sunset' 'Mission Terrace' 'North Beach' 'Northern Waterfront'
 'Eureka Valley' 'Inner Sunset' 'Western Addition' 'Hayes Valley'
 'Duboce Triangle' 'Mission Dolores' 'Pacific Heights' 'Clarendon Heights'
 'Financial District' 'Hunters Point' 'Cole Valley' 'Lone Mountain'
 'Alamo Square' 'South Beach' 'Visitacion Valley' 'Oceanview'
 'Haight Ashbury' 'West Portal' 'Upper Market' 'Buena Vista' 'Stonestown'
 'Silver Terrace' 'Golden Gate Heights' 'Golden Gate Park' 'Anza Vista'
 'Ingleside' 'Inner Richmond' 'Trea

<class 'numpy.ndarray'>
(84,)
[46.299696   34.74454817 34.53734168 33.65608975 20.44838528 18.48993199
 17.72837065 15.99141683 15.42932158 15.20257474 13.82127183 13.78082352
 11.52419745 11.17214019 11.13707665 10.30397774 10.14063737  9.73325402
  9.54938674  9.45140129  8.85372393  8.78829445  8.66974807  7.91506504
  7.45766463  7.05928521  6.25658899  5.86588375  5.81067939  5.6710953
  5.485291    5.37426682  5.00103362  4.88573962  4.82954313  4.52012058
  4.51413543  4.38288383  4.32170121  4.29576138  4.10941347  4.05743603
  3.92321706  3.83904789  3.69728221  3.55657432  3.17837458  3.16059298
  3.15044542  3.09821842  2.86823322  2.78144017  2.54008448  2.53995119
  2.43437165  2.3117823   2.18426216  2.08833345  2.00793365  1.9726297
  1.56795693  1.33349473  1.15730125  1.10619738  0.996141    0.9762126
  0.91586685  0.78446679  0.7643569   0.76084948  0.75616148  0.75462698
  0.43471171  0.39485013  0.28771312  0.27914332  0.27181864  0.27181864
  0.23936316  0.07672342

  0.27914332  0.24801141  0.24801141  0.23936316  0.07672342  0.07672342]
<class 'numpy.ndarray'>
['South of Market' 'Downtown / Union Square' 'Tenderloin' 'Mission'
 'Potrero Hill' 'Bayview' 'Western Addition' 'Portola' 'Civic Center'
 'Northern Waterfront' 'Marina' 'Inner Richmond' 'Bernal Heights'
 'Hunters Point' 'North Beach' 'Outer Sunset' 'Excelsior'
 'Mission Terrace' 'Russian Hill' 'Eureka Valley' 'Noe Valley'
 'Outer Richmond' 'Hayes Valley' 'Stonestown' 'South Beach' 'Mission Bay'
 'Pacific Heights' 'Cow Hollow' 'Inner Sunset' 'Visitacion Valley'
 'Mission Dolores' 'Financial District' 'Lower Pacific Heights'
 'Silver Terrace' 'Buena Vista' 'Duboce Triangle' 'Nob Hill' 'Parkside'
 'Panhandle' 'Haight Ashbury' 'Laurel Heights / Jordan Park'
 'Alamo Square' 'Central Waterfront' 'Crocker Amazon' 'Presidio Heights'
 'Upper Market' 'Oceanview' 'Golden Gate Park' 'Lone Mountain'
 'West Portal' 'Outer Mission' 'Sunnyside' 'Clarendon Heights' 'Lakeshore'
 'Telegraph Hill' 'Treasure 

  0.24811917  0.24811917  0.24801141  0.07672342  0.07672342  0.07672342]
<class 'numpy.ndarray'>
['South of Market' 'Downtown / Union Square' 'Tenderloin' 'Mission'
 'Bayview' 'Potrero Hill' 'Outer Sunset' 'Western Addition'
 'Bernal Heights' 'Inner Richmond' 'Marina' 'Civic Center'
 'Northern Waterfront' 'Cow Hollow' 'Excelsior' 'Hunters Point'
 'Inner Sunset' 'Portola' 'Russian Hill' 'Visitacion Valley'
 'Pacific Heights' 'Mission Terrace' 'Hayes Valley' 'Nob Hill'
 'Financial District' 'North Beach' 'Outer Richmond' 'Mission Bay'
 'Lone Mountain' 'Central Waterfront' 'Mission Dolores' 'Eureka Valley'
 'Silver Terrace' 'Duboce Triangle' 'Lower Pacific Heights' 'Buena Vista'
 'Laurel Heights / Jordan Park' 'Parkside' 'Stonestown' 'South Beach'
 'Outer Mission' 'Golden Gate Park' 'Anza Vista' 'Golden Gate Heights'
 'Noe Valley' 'Upper Market' 'Panhandle' 'Cole Valley' 'Haight Ashbury'
 'Sunnyside' 'Forest Hill' 'Clarendon Heights' 'Presidio Heights'
 'Glen Park' 'Oceanview' 'Diamond H

KeyboardInterrupt: 

In [None]:
num_hotspots_range = [4]