In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import calplot
from matplotlib.ticker import FixedLocator
import json
import plotly.express as px

### First Map Gentrification Typologies + Crime Rate Growth

In [None]:
###-------------------------------------------------------------------------------------------------------------------
#          Initialize Data
###-------------------------------------------------------------------------------------------------------------------
# load the data
crime_data = pd.read_csv("police_data.csv")

# filter out non-focused crimes
focuscrimes = set(['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'DRUG/NARCOTIC', 'TRESPASS', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY', 'DISORDERLY CONDUCT'])
crime_data = crime_data[crime_data['Category'].isin(focuscrimes)]

# extract the year from the date column
crime_data['Year'] = pd.to_datetime(crime_data['Date']).dt.year

# limit the data to the years 2003-2017
crime_data = crime_data[(crime_data['Year'] >= 2003) & (crime_data['Year'] <= 2017)]

In [None]:
###-------------------------------------------------------------------------------------------------------------------
#          Retrieving the linear Increase/Decrease in Crime Rates for each District during 2003-2017
###-------------------------------------------------------------------------------------------------------------------
from sklearn.linear_model import Ridge

# get district names
districts = list(crime_data['PdDistrict'].unique()[:-1])

# initialize linear weight dataframe
crime_district_linear_weight = pd.DataFrame(None, columns=["DISTRICT", "Linear Crime Growth"])

# creating necessary grouped table with district and year
crime_data_year_district_cat = crime_data.groupby(["Year", "PdDistrict"]).size().unstack()

# iterating over each district to find the linear crime increase/decrease rate during 2003-2017
for district in districts:
    lr = Ridge()
    np.array(crime_data_year_district_cat.index).reshape(-1, 1) 
    lr.fit(np.array(crime_data_year_district_cat.index).reshape(-1, 1), crime_data_year_district_cat[district].fillna(0))
    crime_district_linear_weight.loc[len(crime_district_linear_weight)] = [district, round(lr.coef_[0], 2)]
print(crime_district_linear_weight)

In [None]:
###-------------------------------------------------------------------------------------------------------------------
#          Retrieving the linear Increase/Decrease in Crime Rates for each District during 2003-2017
###-------------------------------------------------------------------------------------------------------------------
from shapely import wkt
from shapely.wkt import dumps, loads
from shapely.geometry import mapping, shape

# import gentrification geojson data
f = open("sanfrancisco.geojson")
geo_json_data_sanfrancisco_gentrification = json.load(f)

# import crime geojson data
f = open("sfpd.geojson")
geo_json_data_sfpd = json.load(f)

# import gentrification typology labels for each neighboorhood
gentrification_data = pd.read_csv("sanfrancisco.csv")
gentrification_data.head()

# empty dict for the typologies included in one district (each district has multiple neighbourhoods and thus multiple typologies )
typologies_per_district = {}

for feature_spfd in geo_json_data_sfpd["features"]:
    # retrieve geometric shape of each district
    district_spfd = feature_spfd["properties"]["DISTRICT"]
    feature_geometry_spfd = json.dumps(feature_spfd["geometry"])
    district_shape_spfd = shape(json.loads(feature_geometry_spfd))
    
    # for each district go through all neighboorhoods in the gentrification dataset
    for feature_gen in geo_json_data_sanfrancisco_gentrification["features"]:
        
        # get geometric shape of gentrification dataset
        geo_id = feature_gen["properties"]["GEOID"]
        feature_geometry_gen = json.dumps(feature_gen["geometry"])
        district_shape_gen = shape(json.loads(feature_geometry_gen))
        
        # check if neighboorhood is in district based on geometric data and add the typlogy of the neighborhood to the district
        if district_shape_spfd.contains(district_shape_gen):
            typology_of_subdistrict = gentrification_data.loc[gentrification_data['GEOID'] == geo_id]["Typology"].values[0]
            if district_spfd in typologies_per_district:
                typologies_per_district[district_spfd].append(typology_of_subdistrict)
            else: 
                typologies_per_district[district_spfd] = [typology_of_subdistrict]

def most_common(lst):
    return max(set(lst), key=lst.count)

# for each district only use the most frequent typology for it's further analysis
for district in typologies_per_district:
    typologies_per_district[district] = most_common(typologies_per_district[district])
print(typologies_per_district)

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd

# transform typology dict into dataframe
keysList = list(typologies_per_district.keys())
valuesList = list(typologies_per_district.values())
typologies_per_district_df = pd.DataFrame(list(zip(keysList, valuesList)),
               columns =['DISTRICT', 'COEF'])

# create one combined dataframe of the crime and gentrification data
gdf = (
    gpd.GeoDataFrame.from_features(geo_json_data_sfpd)
    .merge(typologies_per_district_df, on = "DISTRICT").merge(crime_district_linear_weight, on ="DISTRICT")
    .assign(lat=lambda d: d.geometry.centroid.y, lon=lambda d: d.geometry.centroid.x)
    .set_index("DISTRICT", drop=False)
)

In [None]:
###-------------------------------------------------------------------------------------------------------------------
#          Preparing map plot
###-------------------------------------------------------------------------------------------------------------------
mapboxtoken="pk.eyJ1IjoiZWRkeXZtIiwiYSI6ImNsZnBxdHNhbDBkc2o0Mm9ibHVucXowdWIifQ.MDJ0rW0yty-bZVoEIHqj0Q"
mapboxstyle="mapbox://styles/eddyvm/clfpy77he000t01mhur5nrerw"

hovertext = ["<b>" + x[0] + "</b>, <br>Linear Weight: " + str(x[2]) + ",<br>Gentrification Status: " + x[1] for x in list(map(list , zip(gdf["DISTRICT"] , gdf["COEF"].astype(str), gdf["Linear Crime Growth"])))]
marker_colors = ["gold", "mediumpurple", "cornflowerblue", "rebeccapurple", "bisque", "bisque", "bisque", "bisque", "gold", "cornflowerblue"] 

legend_colors = ["gold", "bisque", "rebeccapurple", "mediumpurple", "cornflowerblue"]
legend_names = ["Becoming Exclusive", "Stable Moderate/Mixed Income", "Advanced Gentrification", "Early/Ongoing Gentrification", "Low-Income/Susceptible to Displacement"]

###-------------------------------------------------------------------------------------------------------------------
#          Plotting linear crime growth and gentrification statistics using a choropleth map
###-------------------------------------------------------------------------------------------------------------------

# creating map with data
fig = px.choropleth_mapbox(gdf, geojson=geo_json_data_sfpd, locations='DISTRICT', color='Linear Crime Growth',
                           color_continuous_scale="armyrose",
                           range_color=(min(list(crime_district_linear_weight.iloc[:,1])), max(list(crime_district_linear_weight.iloc[:,1]))),
                           mapbox_style="carto-positron",
                           zoom=11, center = {"lat": 37.7749, "lon": -122.431297},
                           opacity=0.5,
                          )
# removing hover effects of the general map
fig.update_traces(hovertemplate=None, hoverinfo='skip')

# adding marker positions to the district centroids for the gentrification labels
texttrace = go.Scattermapbox(
            lat=gdf.geometry.centroid.y,
            lon=gdf.geometry.centroid.x,
            mode='markers+text',
            marker=go.scattermapbox.Marker(
            size=13,
            color=marker_colors,
            opacity=0.9,
        ),
        showlegend=False,
        textposition='top left',
        textfont=dict(size=12),
        hoverinfo='text', hovertext=hovertext, 
        hoverlabel=dict(
            bgcolor="white",
            font_size=13,
        )
)
fig.add_trace(texttrace)

# updating map style
fig.update_layout(
    mapbox=dict(
        accesstoken=mapboxtoken,
        zoom=11,
        style='light'
    ), 
    margin={"r": 0, "t": 0, "l": 0, "b": 0}
)

# adding misc legend
for i in range(len(legend_colors)):
    fig.add_trace(
            go.Scatter(
                x=[None],
                y=[None],
                mode="markers",
                marker=dict(
                    color=legend_colors[i],
                    size=13,
                    opacity=0.9
                ),
                name=legend_names[i],
                legendgroup="up"
            )
        )
# position legend
fig.update_layout(legend=dict(
    title="Gentrification Status (2010-2018)",
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
fig.show()
